bug1739761.patch (19445B)
1 diff --git a/src/hashmgr.cxx b/src/hashmgr.cxx 2 --- a/src/hashmgr.cxx 3 +++ b/src/hashmgr.cxx 4 @@ -63,16 +63,17 @@ 5 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 6 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 7 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 8 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 9 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 10 * SUCH DAMAGE. 11 */ 12 13 +#include <assert.h> 14 #include <stdlib.h> 15 #include <string.h> 16 #include <stdio.h> 17 #include <ctype.h> 18 #include <limits> 19 #include <sstream> 20 21 #include "hashmgr.hxx" 22 @@ -118,52 +119,54 @@ HashMgr::~HashMgr() { 23 // go through column by column of the table 24 for (int i = 0; i < tablesize; i++) { 25 struct hentry* pt = tableptr[i]; 26 struct hentry* nt = NULL; 27 while (pt) { 28 nt = pt->next; 29 if (pt->astr && 30 (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) 31 - free(pt->astr); 32 - free(pt); 33 + arena_free(pt->astr); 34 + arena_free(pt); 35 pt = nt; 36 } 37 } 38 free(tableptr); 39 } 40 tablesize = 0; 41 42 if (aliasf) { 43 for (int j = 0; j < (numaliasf); j++) 44 - free(aliasf[j]); 45 - free(aliasf); 46 + arena_free(aliasf[j]); 47 + arena_free(aliasf); 48 aliasf = NULL; 49 if (aliasflen) { 50 - free(aliasflen); 51 + arena_free(aliasflen); 52 aliasflen = NULL; 53 } 54 } 55 if (aliasm) { 56 for (int j = 0; j < (numaliasm); j++) 57 - free(aliasm[j]); 58 - free(aliasm); 59 + arena_free(aliasm[j]); 60 + arena_free(aliasm); 61 aliasm = NULL; 62 } 63 64 #ifndef OPENOFFICEORG 65 #ifndef MOZILLA_CLIENT 66 if (utf8) 67 free_utf_tbl(); 68 #endif 69 #endif 70 71 #ifdef MOZILLA_CLIENT 72 delete[] csconv; 73 #endif 74 + 75 + assert(outstanding_arena_allocations == 0); 76 } 77 78 // lookup a root word in the hashtable 79 80 struct hentry* HashMgr::lookup(const char* word) const { 81 struct hentry* dp; 82 if (tableptr) { 83 dp = tableptr[hash(word)]; 84 @@ -222,17 +225,17 @@ int HashMgr::add_word(const std::string& 85 86 word = word_copy; 87 } 88 89 bool upcasehomonym = false; 90 int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0; 91 // variable-length hash record with word and optional fields 92 struct hentry* hp = 93 - (struct hentry*)malloc(sizeof(struct hentry) + word->size() + descl); 94 + (struct hentry*)arena_alloc(sizeof(struct hentry) + word->size() + descl); 95 if (!hp) { 96 delete desc_copy; 97 delete word_copy; 98 return 1; 99 } 100 101 char* hpw = hp->word; 102 strcpy(hpw, word->c_str()); 103 @@ -366,57 +369,57 @@ int HashMgr::add_word(const std::string& 104 delete word_copy; 105 return 0; 106 } 107 while (dp->next != NULL) { 108 if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) { 109 // remove hidden onlyupcase homonym 110 if (!onlyupcase) { 111 if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { 112 - free(dp->astr); 113 + arena_free(dp->astr); 114 dp->astr = hp->astr; 115 dp->alen = hp->alen; 116 - free(hp); 117 + arena_free(hp); 118 delete desc_copy; 119 delete word_copy; 120 return 0; 121 } else { 122 dp->next_homonym = hp; 123 } 124 } else { 125 upcasehomonym = true; 126 } 127 } 128 dp = dp->next; 129 } 130 if (strcmp(hp->word, dp->word) == 0) { 131 // remove hidden onlyupcase homonym 132 if (!onlyupcase) { 133 if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { 134 - free(dp->astr); 135 + arena_free(dp->astr); 136 dp->astr = hp->astr; 137 dp->alen = hp->alen; 138 - free(hp); 139 + arena_free(hp); 140 delete desc_copy; 141 delete word_copy; 142 return 0; 143 } else { 144 dp->next_homonym = hp; 145 } 146 } else { 147 upcasehomonym = true; 148 } 149 } 150 if (!upcasehomonym) { 151 dp->next = hp; 152 } else { 153 // remove hidden onlyupcase homonym 154 if (hp->astr) 155 - free(hp->astr); 156 - free(hp); 157 + arena_free(hp->astr); 158 + arena_free(hp); 159 } 160 161 delete desc_copy; 162 delete word_copy; 163 return 0; 164 } 165 166 int HashMgr::add_hidden_capitalized_word(const std::string& word, 167 @@ -430,17 +433,17 @@ int HashMgr::add_hidden_capitalized_word 168 169 // add inner capitalized forms to handle the following allcap forms: 170 // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG 171 // Allcaps with suffixes: CIA's -> CIA'S 172 if (((captype == HUHCAP) || (captype == HUHINITCAP) || 173 ((captype == ALLCAP) && (flagslen != 0))) && 174 !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) { 175 unsigned short* flags2 = 176 - (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1)); 177 + (unsigned short*)arena_alloc(sizeof(unsigned short) * (flagslen + 1)); 178 if (!flags2) 179 return 1; 180 if (flagslen) 181 memcpy(flags2, flags, flagslen * sizeof(unsigned short)); 182 flags2[flagslen] = ONLYUPCASEFLAG; 183 if (utf8) { 184 std::string st; 185 std::vector<w_char> w; 186 @@ -479,23 +482,23 @@ int HashMgr::get_clen_and_captype(const 187 } 188 189 // remove word (personal dictionary function for standalone applications) 190 int HashMgr::remove(const std::string& word) { 191 struct hentry* dp = lookup(word.c_str()); 192 while (dp) { 193 if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { 194 unsigned short* flags = 195 - (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1)); 196 + (unsigned short*)arena_alloc(sizeof(unsigned short) * (dp->alen + 1)); 197 if (!flags) 198 return 1; 199 for (int i = 0; i < dp->alen; i++) 200 flags[i] = dp->astr[i]; 201 flags[dp->alen] = forbiddenword; 202 - free(dp->astr); 203 + arena_free(dp->astr); 204 dp->astr = flags; 205 dp->alen++; 206 std::sort(flags, flags + dp->alen); 207 } 208 dp = dp->next_homonym; 209 } 210 return 0; 211 } 212 @@ -533,17 +536,17 @@ int HashMgr::add_with_affix(const std::s 213 remove_forbidden_flag(word); 214 if (dp && dp->astr) { 215 int captype; 216 int wcl = get_clen_and_captype(word, &captype); 217 if (aliasf) { 218 add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype); 219 } else { 220 unsigned short* flags = 221 - (unsigned short*)malloc(dp->alen * sizeof(unsigned short)); 222 + (unsigned short*) arena_alloc(dp->alen * sizeof(unsigned short)); 223 if (flags) { 224 memcpy((void*)flags, (void*)dp->astr, 225 dp->alen * sizeof(unsigned short)); 226 add_word(word, wcl, flags, dp->alen, NULL, false, captype); 227 } else 228 return 1; 229 } 230 return add_hidden_capitalized_word(word, wcl, dp->astr, 231 @@ -668,17 +671,17 @@ int HashMgr::load_tables(const char* tpa 232 if (aliasf) { 233 int index = atoi(ap.c_str()); 234 al = get_aliasf(index, &flags, dict); 235 if (!al) { 236 HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", 237 dict->getlinenum()); 238 } 239 } else { 240 - al = decode_flags(&flags, ap.c_str(), dict); 241 + al = decode_flags(&flags, ap.c_str(), dict, /* arena = */ true); 242 if (al == -1) { 243 HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); 244 delete dict; 245 return 6; 246 } 247 std::sort(flags, flags + al); 248 } 249 } else { 250 @@ -709,47 +712,48 @@ int HashMgr::hash(const char* word) cons 251 hv = (hv << 8) | (*word++); 252 while (*word != 0) { 253 ROTATE(hv, ROTATE_LEN); 254 hv ^= (*word++); 255 } 256 return (unsigned long)hv % tablesize; 257 } 258 259 -int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const { 260 +int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const { 261 + auto alloc = [arena, this](int n) { return arena ? this->arena_alloc(n) : malloc(n); }; 262 int len; 263 if (flags.empty()) { 264 *result = NULL; 265 return 0; 266 } 267 switch (flag_mode) { 268 case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) 269 len = flags.size(); 270 if (len % 2 == 1) 271 HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", 272 af->getlinenum()); 273 len /= 2; 274 - *result = (unsigned short*)malloc(len * sizeof(unsigned short)); 275 + *result = (unsigned short*)alloc(len * sizeof(unsigned short)); 276 if (!*result) 277 return -1; 278 for (int i = 0; i < len; i++) { 279 (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) + 280 (unsigned char)flags[i * 2 + 1]; 281 } 282 break; 283 } 284 case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 285 // 23 233) 286 len = 1; 287 unsigned short* dest; 288 for (size_t i = 0; i < flags.size(); ++i) { 289 if (flags[i] == ',') 290 len++; 291 } 292 - *result = (unsigned short*)malloc(len * sizeof(unsigned short)); 293 + *result = (unsigned short*)alloc(len * sizeof(unsigned short)); 294 if (!*result) 295 return -1; 296 dest = *result; 297 const char* src = flags.c_str(); 298 for (const char* p = src; *p; p++) { 299 if (*p == ',') { 300 int i = atoi(src); 301 if (i >= DEFAULTFLAGS) 302 @@ -774,26 +778,26 @@ int HashMgr::decode_flags(unsigned short 303 HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", 304 af->getlinenum()); 305 break; 306 } 307 case FLAG_UNI: { // UTF-8 characters 308 std::vector<w_char> w; 309 u8_u16(w, flags); 310 len = w.size(); 311 - *result = (unsigned short*)malloc(len * sizeof(unsigned short)); 312 + *result = (unsigned short*)alloc(len * sizeof(unsigned short)); 313 if (!*result) 314 return -1; 315 memcpy(*result, w.data(), len * sizeof(short)); 316 break; 317 } 318 default: { // Ispell's one-character flags (erfg -> e r f g) 319 unsigned short* dest; 320 len = flags.size(); 321 - *result = (unsigned short*)malloc(len * sizeof(unsigned short)); 322 + *result = (unsigned short*)alloc(len * sizeof(unsigned short)); 323 if (!*result) 324 return -1; 325 dest = *result; 326 for (size_t i = 0; i < flags.size(); ++i) { 327 *dest = (unsigned char)flags[i]; 328 dest++; 329 } 330 } 331 @@ -890,16 +894,18 @@ unsigned short HashMgr::decode_flag(cons 332 default: 333 s = *(unsigned char*)f; 334 } 335 if (s == 0) 336 HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); 337 return s; 338 } 339 340 +// This function is only called by external consumers, and so using the default 341 +// allocator with mystrdup is correct. 342 char* HashMgr::encode_flag(unsigned short f) const { 343 if (f == 0) 344 return mystrdup("(NULL)"); 345 std::string ch; 346 if (flag_mode == FLAG_LONG) { 347 ch.push_back((unsigned char)(f >> 8)); 348 ch.push_back((unsigned char)(f - ((f >> 8) << 8))); 349 } else if (flag_mode == FLAG_NUM) { 350 @@ -1070,42 +1076,42 @@ bool HashMgr::parse_aliasf(const std::st 351 numaliasf = 0; 352 aliasf = NULL; 353 aliasflen = NULL; 354 HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", 355 af->getlinenum()); 356 return false; 357 } 358 aliasf = 359 - (unsigned short**)malloc(numaliasf * sizeof(unsigned short*)); 360 + (unsigned short**)arena_alloc(numaliasf * sizeof(unsigned short*)); 361 aliasflen = 362 - (unsigned short*)malloc(numaliasf * sizeof(unsigned short)); 363 + (unsigned short*)arena_alloc(numaliasf * sizeof(unsigned short)); 364 if (!aliasf || !aliasflen) { 365 numaliasf = 0; 366 if (aliasf) 367 - free(aliasf); 368 + arena_free(aliasf); 369 if (aliasflen) 370 - free(aliasflen); 371 + arena_free(aliasflen); 372 aliasf = NULL; 373 aliasflen = NULL; 374 return false; 375 } 376 np++; 377 break; 378 } 379 default: 380 break; 381 } 382 ++i; 383 start_piece = mystrsep(line, iter); 384 } 385 if (np != 2) { 386 numaliasf = 0; 387 - free(aliasf); 388 - free(aliasflen); 389 + arena_free(aliasf); 390 + arena_free(aliasflen); 391 aliasf = NULL; 392 aliasflen = NULL; 393 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", 394 af->getlinenum()); 395 return false; 396 } 397 398 /* now parse the numaliasf lines to read in the remainder of the table */ 399 @@ -1126,33 +1132,33 @@ bool HashMgr::parse_aliasf(const std::st 400 errored = true; 401 break; 402 } 403 break; 404 } 405 case 1: { 406 std::string piece(start_piece, iter); 407 aliasflen[j] = 408 - (unsigned short)decode_flags(&(aliasf[j]), piece, af); 409 + (unsigned short)decode_flags(&(aliasf[j]), piece, af, /* arena = */ true); 410 std::sort(aliasf[j], aliasf[j] + aliasflen[j]); 411 break; 412 } 413 default: 414 break; 415 } 416 ++i; 417 start_piece = mystrsep(nl, iter); 418 } 419 } 420 if (!aliasf[j]) { 421 for (int k = 0; k < j; ++k) { 422 - free(aliasf[k]); 423 + arena_free(aliasf[k]); 424 } 425 - free(aliasf); 426 - free(aliasflen); 427 + arena_free(aliasf); 428 + arena_free(aliasflen); 429 aliasf = NULL; 430 aliasflen = NULL; 431 numaliasf = 0; 432 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", 433 af->getlinenum()); 434 return false; 435 } 436 } 437 @@ -1193,33 +1199,33 @@ bool HashMgr::parse_aliasm(const std::st 438 } 439 case 1: { 440 numaliasm = atoi(std::string(start_piece, iter).c_str()); 441 if (numaliasm < 1) { 442 HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", 443 af->getlinenum()); 444 return false; 445 } 446 - aliasm = (char**)malloc(numaliasm * sizeof(char*)); 447 + aliasm = (char**)arena_alloc(numaliasm * sizeof(char*)); 448 if (!aliasm) { 449 numaliasm = 0; 450 return false; 451 } 452 np++; 453 break; 454 } 455 default: 456 break; 457 } 458 ++i; 459 start_piece = mystrsep(line, iter); 460 } 461 if (np != 2) { 462 numaliasm = 0; 463 - free(aliasm); 464 + arena_free(aliasm); 465 aliasm = NULL; 466 HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", 467 af->getlinenum()); 468 return false; 469 } 470 471 /* now parse the numaliasm lines to read in the remainder of the table */ 472 for (int j = 0; j < numaliasm; j++) { 473 @@ -1245,32 +1251,36 @@ bool HashMgr::parse_aliasm(const std::st 474 std::string::const_iterator end = nl.end(); 475 std::string chunk(start_piece, end); 476 if (complexprefixes) { 477 if (utf8) 478 reverseword_utf(chunk); 479 else 480 reverseword(chunk); 481 } 482 - aliasm[j] = mystrdup(chunk.c_str()); 483 + size_t sl = chunk.length() + 1; 484 + aliasm[j] = (char*)arena_alloc(sl); 485 + if (aliasm[j]) { 486 + memcpy(aliasm[j], chunk.c_str(), sl); 487 + } 488 break; 489 } 490 default: 491 break; 492 } 493 ++i; 494 start_piece = mystrsep(nl, iter); 495 } 496 } 497 if (!aliasm[j]) { 498 numaliasm = 0; 499 for (int k = 0; k < j; ++k) { 500 - free(aliasm[k]); 501 + arena_free(aliasm[k]); 502 } 503 - free(aliasm); 504 + arena_free(aliasm); 505 aliasm = NULL; 506 HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", 507 af->getlinenum()); 508 return false; 509 } 510 } 511 return true; 512 } 513 @@ -1379,8 +1389,27 @@ bool HashMgr::parse_reptable(const std:: 514 } 515 return true; 516 } 517 518 // return replacing table 519 const std::vector<replentry>& HashMgr::get_reptable() const { 520 return reptable; 521 } 522 + 523 +void* HashMgr::arena_alloc(int num_bytes) { 524 + static const int MIN_CHUNK_SIZE = 4096; 525 + if (arena.empty() || (current_chunk_size - current_chunk_offset < num_bytes)) { 526 + current_chunk_size = std::max(MIN_CHUNK_SIZE, num_bytes); 527 + arena.push_back(std::make_unique<uint8_t[]>(current_chunk_size)); 528 + current_chunk_offset = 0; 529 + } 530 + 531 + uint8_t* ptr = &arena.back()[current_chunk_offset]; 532 + current_chunk_offset += num_bytes; 533 + outstanding_arena_allocations++; 534 + return ptr; 535 +} 536 + 537 +void HashMgr::arena_free(void* ptr) { 538 + --outstanding_arena_allocations; 539 + assert(outstanding_arena_allocations >= 0); 540 +} 541 diff --git a/src/hashmgr.hxx b/src/hashmgr.hxx 542 --- a/src/hashmgr.hxx 543 +++ b/src/hashmgr.hxx 544 @@ -67,16 +67,18 @@ 545 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 546 * SUCH DAMAGE. 547 */ 548 549 #ifndef HASHMGR_HXX_ 550 #define HASHMGR_HXX_ 551 552 #include <stdio.h> 553 +#include <stdint.h> 554 +#include <memory> 555 #include <string> 556 #include <vector> 557 558 #include "htypes.hxx" 559 #include "filemgr.hxx" 560 #include "w_char.hxx" 561 562 enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; 563 @@ -116,17 +118,23 @@ class HashMgr { 564 565 struct hentry* lookup(const char*) const; 566 int hash(const char*) const; 567 struct hentry* walk_hashtable(int& col, struct hentry* hp) const; 568 569 int add(const std::string& word); 570 int add_with_affix(const std::string& word, const std::string& pattern); 571 int remove(const std::string& word); 572 - int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const; 573 +private: 574 + // Only internal consumers are allowed to arena-allocate. 575 + int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const; 576 +public: 577 + int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const { 578 + return decode_flags(result, flags, af, /* arena = */ false); 579 + } 580 bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const; 581 unsigned short decode_flag(const char* flag) const; 582 char* encode_flag(unsigned short flag) const; 583 int is_aliasf() const; 584 int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const; 585 int is_aliasm() const; 586 char* get_aliasm(int index) const; 587 const std::vector<replentry>& get_reptable() const; 588 @@ -148,11 +156,27 @@ class HashMgr { 589 int wcl, 590 unsigned short* flags, 591 int al, 592 const std::string* dp, 593 int captype); 594 bool parse_aliasm(const std::string& line, FileMgr* af); 595 bool parse_reptable(const std::string& line, FileMgr* af); 596 int remove_forbidden_flag(const std::string& word); 597 + 598 + // Our Mozilla fork uses a simple arena allocator for certain strings which 599 + // persist for the lifetime of the HashMgr in order to avoid heap fragmentation. 600 + // It's a simple bump-allocator, so we can't actually free() memory midway 601 + // through the lifecycle, but we have a dummy free() implementation to ensure 602 + // that our calls to arena_alloc() and arena_free() are balanced. 603 + void* arena_alloc(int num_bytes); 604 + void* arena_alloc(int num_bytes) const { 605 + return const_cast<HashMgr*>(this)->arena_alloc(num_bytes); 606 + } 607 + void arena_free(void* ptr); 608 + 609 + std::vector<std::unique_ptr<uint8_t[]>> arena; 610 + int current_chunk_size = 0; 611 + int current_chunk_offset = 0; 612 + int outstanding_arena_allocations = 0; 613 }; 614 615 #endif