spellfile.c (187236B)
1 // spellfile.c: code for reading and writing spell files. 2 // 3 // See spell.c for information about spell checking. 4 5 // Vim spell file format: <HEADER> 6 // <SECTIONS> 7 // <LWORDTREE> 8 // <KWORDTREE> 9 // <PREFIXTREE> 10 // 11 // <HEADER>: <fileID> <versionnr> 12 // 13 // <fileID> 8 bytes "VIMspell" 14 // <versionnr> 1 byte VIMSPELLVERSION 15 // 16 // 17 // Sections make it possible to add information to the .spl file without 18 // making it incompatible with previous versions. There are two kinds of 19 // sections: 20 // 1. Not essential for correct spell checking. E.g. for making suggestions. 21 // These are skipped when not supported. 22 // 2. Optional information, but essential for spell checking when present. 23 // E.g. conditions for affixes. When this section is present but not 24 // supported an error message is given. 25 // 26 // <SECTIONS>: <section> ... <sectionend> 27 // 28 // <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 29 // 30 // <sectionID> 1 byte number from 0 to 254 identifying the section 31 // 32 // <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct 33 // spell checking 34 // 35 // <sectionlen> 4 bytes length of section contents, MSB first 36 // 37 // <sectionend> 1 byte SN_END 38 // 39 // 40 // sectionID == SN_INFO: <infotext> 41 // <infotext> N bytes free format text with spell file info (version, 42 // website, etc) 43 // 44 // sectionID == SN_REGION: <regionname> ... 45 // <regionname> 2 bytes Up to MAXREGIONS region names: ca, au, etc. 46 // Lower case. 47 // First <regionname> is region 1. 48 // 49 // sectionID == SN_CHARFLAGS: <charflagslen> <charflags> 50 // <folcharslen> <folchars> 51 // <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). 52 // <charflags> N bytes List of flags (first one is for character 128): 53 // 0x01 word character CF_WORD 54 // 0x02 upper-case character CF_UPPER 55 // <folcharslen> 2 bytes Number of bytes in <folchars>. 56 // <folchars> N bytes Folded characters, first one is for character 128. 57 // 58 // sectionID == SN_MIDWORD: <midword> 59 // <midword> N bytes Characters that are word characters only when used 60 // in the middle of a word. 61 // 62 // sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... 63 // <prefcondcnt> 2 bytes Number of <prefcond> items following. 64 // <prefcond> : <condlen> <condstr> 65 // <condlen> 1 byte Length of <condstr>. 66 // <condstr> N bytes Condition for the prefix. 67 // 68 // sectionID == SN_REP: <repcount> <rep> ... 69 // <repcount> 2 bytes number of <rep> items, MSB first. 70 // <rep> : <repfromlen> <repfrom> <reptolen> <repto> 71 // <repfromlen> 1 byte length of <repfrom> 72 // <repfrom> N bytes "from" part of replacement 73 // <reptolen> 1 byte length of <repto> 74 // <repto> N bytes "to" part of replacement 75 // 76 // sectionID == SN_REPSAL: <repcount> <rep> ... 77 // just like SN_REP but for soundfolded words 78 // 79 // sectionID == SN_SAL: <salflags> <salcount> <sal> ... 80 // <salflags> 1 byte flags for soundsalike conversion: 81 // SAL_F0LLOWUP 82 // SAL_COLLAPSE 83 // SAL_REM_ACCENTS 84 // <salcount> 2 bytes number of <sal> items following 85 // <sal> : <salfromlen> <salfrom> <saltolen> <salto> 86 // <salfromlen> 1 byte length of <salfrom> 87 // <salfrom> N bytes "from" part of soundsalike 88 // <saltolen> 1 byte length of <salto> 89 // <salto> N bytes "to" part of soundsalike 90 // 91 // sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 92 // <sofofromlen> 2 bytes length of <sofofrom> 93 // <sofofrom> N bytes "from" part of soundfold 94 // <sofotolen> 2 bytes length of <sofoto> 95 // <sofoto> N bytes "to" part of soundfold 96 // 97 // sectionID == SN_SUGFILE: <timestamp> 98 // <timestamp> 8 bytes time in seconds that must match with .sug file 99 // 100 // sectionID == SN_NOSPLITSUGS: nothing 101 // 102 // sectionID == SN_NOCOMPOUNDSUGS: nothing 103 // 104 // sectionID == SN_WORDS: <word> ... 105 // <word> N bytes NUL terminated common word 106 // 107 // sectionID == SN_MAP: <mapstr> 108 // <mapstr> N bytes String with sequences of similar characters, 109 // separated by slashes. 110 // 111 // sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> 112 // <comppatcount> <comppattern> ... <compflags> 113 // <compmax> 1 byte Maximum nr of words in compound word. 114 // <compminlen> 1 byte Minimal word length for compounding. 115 // <compsylmax> 1 byte Maximum nr of syllables in compound word. 116 // <compoptions> 2 bytes COMP_ flags. 117 // <comppatcount> 2 bytes number of <comppattern> following 118 // <compflags> N bytes Flags from COMPOUNDRULE items, separated by 119 // slashes. 120 // 121 // <comppattern>: <comppatlen> <comppattext> 122 // <comppatlen> 1 byte length of <comppattext> 123 // <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN 124 // 125 // sectionID == SN_NOBREAK: (empty, its presence is what matters) 126 // 127 // sectionID == SN_SYLLABLE: <syllable> 128 // <syllable> N bytes String from SYLLABLE item. 129 // 130 // <LWORDTREE>: <wordtree> 131 // 132 // <KWORDTREE>: <wordtree> 133 // 134 // <PREFIXTREE>: <wordtree> 135 // 136 // 137 // <wordtree>: <nodecount> <nodedata> ... 138 // 139 // <nodecount> 4 bytes Number of nodes following. MSB first. 140 // 141 // <nodedata>: <siblingcount> <sibling> ... 142 // 143 // <siblingcount> 1 byte Number of siblings in this node. The siblings 144 // follow in sorted order. 145 // 146 // <sibling>: <byte> [ <nodeidx> <xbyte> 147 // | <flags> [<flags2>] [<region>] [<affixID>] 148 // | [<pflags>] <affixID> <prefcondnr> ] 149 // 150 // <byte> 1 byte Byte value of the sibling. Special cases: 151 // BY_NOFLAGS: End of word without flags and for all 152 // regions. 153 // For PREFIXTREE <affixID> and 154 // <prefcondnr> follow. 155 // BY_FLAGS: End of word, <flags> follow. 156 // For PREFIXTREE <pflags>, <affixID> 157 // and <prefcondnr> follow. 158 // BY_FLAGS2: End of word, <flags> and <flags2> 159 // follow. Not used in PREFIXTREE. 160 // BY_INDEX: Child of sibling is shared, <nodeidx> 161 // and <xbyte> follow. 162 // 163 // <nodeidx> 3 bytes Index of child for this sibling, MSB first. 164 // 165 // <xbyte> 1 byte Byte value of the sibling. 166 // 167 // <flags> 1 byte Bitmask of: 168 // WF_ALLCAP word must have only capitals 169 // WF_ONECAP first char of word must be capital 170 // WF_KEEPCAP keep-case word 171 // WF_FIXCAP keep-case word, all caps not allowed 172 // WF_RARE rare word 173 // WF_BANNED bad word 174 // WF_REGION <region> follows 175 // WF_AFX <affixID> follows 176 // 177 // <flags2> 1 byte Bitmask of: 178 // WF_HAS_AFF >> 8 word includes affix 179 // WF_NEEDCOMP >> 8 word only valid in compound 180 // WF_NOSUGGEST >> 8 word not used for suggestions 181 // WF_COMPROOT >> 8 word already a compound 182 // WF_NOCOMPBEF >> 8 no compounding before this word 183 // WF_NOCOMPAFT >> 8 no compounding after this word 184 // 185 // <pflags> 1 byte Bitmask of: 186 // WFP_RARE rare prefix 187 // WFP_NC non-combining prefix 188 // WFP_UP letter after prefix made upper case 189 // 190 // <region> 1 byte Bitmask for regions in which word is valid. When 191 // omitted it's valid in all regions. 192 // Lowest bit is for region 1. 193 // 194 // <affixID> 1 byte ID of affix that can be used with this word. In 195 // PREFIXTREE used for the required prefix ID. 196 // 197 // <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list 198 // from HEADER. 199 // 200 // All text characters are in 'encoding', but stored as single bytes. 201 202 // Vim .sug file format: <SUGHEADER> 203 // <SUGWORDTREE> 204 // <SUGTABLE> 205 // 206 // <SUGHEADER>: <fileID> <versionnr> <timestamp> 207 // 208 // <fileID> 6 bytes "VIMsug" 209 // <versionnr> 1 byte VIMSUGVERSION 210 // <timestamp> 8 bytes timestamp that must match with .spl file 211 // 212 // 213 // <SUGWORDTREE>: <wordtree> (see above, no flags or region used) 214 // 215 // 216 // <SUGTABLE>: <sugwcount> <sugline> ... 217 // 218 // <sugwcount> 4 bytes number of <sugline> following 219 // 220 // <sugline>: <sugnr> ... NUL 221 // 222 // <sugnr>: X bytes word number that results in this soundfolded word, 223 // stored as an offset to the previous number in as 224 // few bytes as possible, see offset2bytes()) 225 226 #include <assert.h> 227 #include <ctype.h> 228 #include <inttypes.h> 229 #include <limits.h> 230 #include <stdbool.h> 231 #include <stddef.h> 232 #include <stdio.h> 233 #include <stdlib.h> 234 #include <string.h> 235 #include <time.h> 236 237 #include "nvim/arglist.h" 238 #include "nvim/ascii_defs.h" 239 #include "nvim/buffer.h" 240 #include "nvim/buffer_defs.h" 241 #include "nvim/charset.h" 242 #include "nvim/drawscreen.h" 243 #include "nvim/errors.h" 244 #include "nvim/ex_cmds_defs.h" 245 #include "nvim/fileio.h" 246 #include "nvim/garray.h" 247 #include "nvim/garray_defs.h" 248 #include "nvim/gettext_defs.h" 249 #include "nvim/globals.h" 250 #include "nvim/hashtab.h" 251 #include "nvim/hashtab_defs.h" 252 #include "nvim/macros_defs.h" 253 #include "nvim/mbyte.h" 254 #include "nvim/mbyte_defs.h" 255 #include "nvim/memline.h" 256 #include "nvim/memory.h" 257 #include "nvim/message.h" 258 #include "nvim/option.h" 259 #include "nvim/option_defs.h" 260 #include "nvim/option_vars.h" 261 #include "nvim/os/fs.h" 262 #include "nvim/os/input.h" 263 #include "nvim/os/os.h" 264 #include "nvim/os/os_defs.h" 265 #include "nvim/os/stdpaths_defs.h" 266 #include "nvim/os/time.h" 267 #include "nvim/os/time_defs.h" 268 #include "nvim/path.h" 269 #include "nvim/pos_defs.h" 270 #include "nvim/regexp.h" 271 #include "nvim/runtime.h" 272 #include "nvim/runtime_defs.h" 273 #include "nvim/spell.h" 274 #include "nvim/spell_defs.h" 275 #include "nvim/spellfile.h" 276 #include "nvim/strings.h" 277 #include "nvim/types_defs.h" 278 #include "nvim/ui.h" 279 #include "nvim/undo.h" 280 #include "nvim/vim_defs.h" 281 282 // Special byte values for <byte>. Some are only used in the tree for 283 // postponed prefixes, some only in the other trees. This is a bit messy... 284 enum { 285 BY_NOFLAGS = 0, // end of word without flags or region; for postponed prefix: no <pflags> 286 BY_INDEX = 1, // child is shared, index follows 287 BY_FLAGS = 2, // end of word, <flags> byte follows; for postponed prefix: <pflags> follows 288 BY_FLAGS2 = 3, // end of word, <flags> and <flags2> bytes follow; never used in prefix tree 289 BY_SPECIAL = BY_FLAGS2, // highest special byte value 290 }; 291 292 #define ZERO_FLAG 65009 // used when flag is zero: "0" 293 294 // Flags used in .spl file for soundsalike flags. 295 enum { 296 SAL_F0LLOWUP = 1, 297 SAL_COLLAPSE = 2, 298 SAL_REM_ACCENTS = 4, 299 }; 300 301 #define VIMSPELLMAGIC "VIMspell" // string at start of Vim spell file 302 #define VIMSPELLMAGICL (sizeof(VIMSPELLMAGIC) - 1) 303 #define VIMSPELLVERSION 50 304 305 // Section IDs. Only renumber them when VIMSPELLVERSION changes! 306 enum { 307 SN_REGION = 0, // <regionname> section 308 SN_CHARFLAGS = 1, // charflags section 309 SN_MIDWORD = 2, // <midword> section 310 SN_PREFCOND = 3, // <prefcond> section 311 SN_REP = 4, // REP items section 312 SN_SAL = 5, // SAL items section 313 SN_SOFO = 6, // soundfolding section 314 SN_MAP = 7, // MAP items section 315 SN_COMPOUND = 8, // compound words section 316 SN_SYLLABLE = 9, // syllable section 317 SN_NOBREAK = 10, // NOBREAK section 318 SN_SUGFILE = 11, // timestamp for .sug file 319 SN_REPSAL = 12, // REPSAL items section 320 SN_WORDS = 13, // common words 321 SN_NOSPLITSUGS = 14, // don't split word for suggestions 322 SN_INFO = 15, // info section 323 SN_NOCOMPOUNDSUGS = 16, // don't compound for suggestions 324 SN_END = 255, // end of sections 325 }; 326 327 #define SNF_REQUIRED 1 // <sectionflags>: required section 328 329 enum { 330 CF_WORD = 0x01, 331 CF_UPPER = 0x02, 332 }; 333 334 static const char *e_spell_trunc = N_("E758: Truncated spell file"); 335 static const char e_error_while_reading_sug_file_str[] 336 = N_("E782: Error while reading .sug file: %s"); 337 static const char e_duplicate_char_in_map_entry[] 338 = N_("E783: Duplicate char in MAP entry"); 339 static const char *e_illegal_character_in_word = N_("E1280: Illegal character in word"); 340 static const char *e_afftrailing = N_("Trailing text in %s line %d: %s"); 341 static const char *e_affname = N_("Affix name too long in %s line %d: %s"); 342 static const char *msg_compressing = N_("Compressing word tree..."); 343 344 #define MAXLINELEN 500 // Maximum length in bytes of a line in a .aff 345 // and .dic file. 346 // Main structure to store the contents of a ".aff" file. 347 typedef struct { 348 char *af_enc; // "SET", normalized, alloc'ed string or NULL 349 int af_flagtype; // AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG 350 unsigned af_rare; // RARE ID for rare word 351 unsigned af_keepcase; // KEEPCASE ID for keep-case word 352 unsigned af_bad; // BAD ID for banned word 353 unsigned af_needaffix; // NEEDAFFIX ID 354 unsigned af_circumfix; // CIRCUMFIX ID 355 unsigned af_needcomp; // NEEDCOMPOUND ID 356 unsigned af_comproot; // COMPOUNDROOT ID 357 unsigned af_compforbid; // COMPOUNDFORBIDFLAG ID 358 unsigned af_comppermit; // COMPOUNDPERMITFLAG ID 359 unsigned af_nosuggest; // NOSUGGEST ID 360 int af_pfxpostpone; // postpone prefixes without chop string and 361 // without flags 362 bool af_ignoreextra; // IGNOREEXTRA present 363 hashtab_T af_pref; // hashtable for prefixes, affheader_T 364 hashtab_T af_suff; // hashtable for suffixes, affheader_T 365 hashtab_T af_comp; // hashtable for compound flags, compitem_T 366 } afffile_T; 367 368 #define AFT_CHAR 0 // flags are one character 369 #define AFT_LONG 1 // flags are two characters 370 #define AFT_CAPLONG 2 // flags are one or two characters 371 #define AFT_NUM 3 // flags are numbers, comma separated 372 373 typedef struct affentry_S affentry_T; 374 // Affix entry from ".aff" file. Used for prefixes and suffixes. 375 struct affentry_S { 376 affentry_T *ae_next; // next affix with same name/number 377 char *ae_chop; // text to chop off basic word (can be NULL) 378 char *ae_add; // text to add to basic word (can be NULL) 379 char *ae_flags; // flags on the affix (can be NULL) 380 char *ae_cond; // condition (NULL for ".") 381 regprog_T *ae_prog; // regexp program for ae_cond or NULL 382 char ae_compforbid; // COMPOUNDFORBIDFLAG found 383 char ae_comppermit; // COMPOUNDPERMITFLAG found 384 }; 385 386 #define AH_KEY_LEN 17 // 2 x 8 bytes + NUL 387 388 // Affix header from ".aff" file. Used for af_pref and af_suff. 389 typedef struct { 390 char ah_key[AH_KEY_LEN]; // key for hashtab == name of affix 391 unsigned ah_flag; // affix name as number, uses "af_flagtype" 392 int ah_newID; // prefix ID after renumbering; 0 if not used 393 int ah_combine; // suffix may combine with prefix 394 int ah_follows; // another affix block should be following 395 affentry_T *ah_first; // first affix entry 396 } affheader_T; 397 398 #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) 399 400 // Flag used in compound items. 401 typedef struct { 402 char ci_key[AH_KEY_LEN]; // key for hashtab == name of compound 403 unsigned ci_flag; // affix name as number, uses "af_flagtype" 404 int ci_newID; // affix ID after renumbering. 405 } compitem_T; 406 407 #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) 408 409 // Structure that is used to store the items in the word tree. This avoids 410 // the need to keep track of each allocated thing, everything is freed all at 411 // once after ":mkspell" is done. 412 // Note: "sb_next" must be just before "sb_data" to make sure the alignment of 413 // "sb_data" is correct for systems where pointers must be aligned on 414 // pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc). 415 #define SBLOCKSIZE 16000 // size of sb_data 416 typedef struct sblock_S sblock_T; 417 struct sblock_S { 418 int sb_used; // nr of bytes already in use 419 sblock_T *sb_next; // next block in list 420 char sb_data[]; // data 421 }; 422 423 // A node in the tree. 424 typedef struct wordnode_S wordnode_T; 425 struct wordnode_S { 426 union { // shared to save space 427 uint8_t hashkey[6]; // the hash key, only used while compressing 428 int index; // index in written nodes (valid after first 429 // round) 430 } wn_u1; 431 union { // shared to save space 432 wordnode_T *next; // next node with same hash key 433 wordnode_T *wnode; // parent node that will write this node 434 } wn_u2; 435 wordnode_T *wn_child; // child (next byte in word) 436 wordnode_T *wn_sibling; // next sibling (alternate byte in word, 437 // always sorted) 438 int wn_refs; // Nr. of references to this node. Only 439 // relevant for first node in a list of 440 // siblings, in following siblings it is 441 // always one. 442 uint8_t wn_byte; // Byte for this node. NUL for word end 443 444 // Info for when "wn_byte" is NUL. 445 // In PREFIXTREE "wn_region" is used for the prefcondnr. 446 // In the soundfolded word tree "wn_flags" has the MSW of the wordnr and 447 // "wn_region" the LSW of the wordnr. 448 uint8_t wn_affixID; // supported/required prefix ID or 0 449 uint16_t wn_flags; // WF_ flags 450 int16_t wn_region; // region mask 451 452 #ifdef SPELL_PRINTTREE 453 int wn_nr; // sequence nr for printing 454 #endif 455 }; 456 457 #define WN_MASK 0xffff // mask relevant bits of "wn_flags" 458 459 #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) 460 461 // Info used while reading the spell files. 462 typedef struct { 463 wordnode_T *si_foldroot; // tree with case-folded words 464 int si_foldwcount; // nr of words in si_foldroot 465 466 wordnode_T *si_keeproot; // tree with keep-case words 467 int si_keepwcount; // nr of words in si_keeproot 468 469 wordnode_T *si_prefroot; // tree with postponed prefixes 470 471 int si_sugtree; // creating the soundfolding trie 472 473 sblock_T *si_blocks; // memory blocks used 474 int si_blocks_cnt; // memory blocks allocated 475 int si_did_emsg; // true when ran out of memory 476 477 int si_compress_cnt; // words to add before lowering 478 // compression limit 479 wordnode_T *si_first_free; // List of nodes that have been freed during 480 // compression, linked by "wn_child" field. 481 int si_free_count; // number of nodes in si_first_free 482 #ifdef SPELL_PRINTTREE 483 int si_wordnode_nr; // sequence nr for nodes 484 #endif 485 buf_T *si_spellbuf; // buffer used to store soundfold word table 486 487 int si_ascii; // handling only ASCII words 488 int si_add; // addition file 489 int si_clear_chartab; // when true clear char tables 490 int si_region; // region mask 491 vimconv_T si_conv; // for conversion to 'encoding' 492 int si_memtot; // runtime memory used 493 int si_verbose; // verbose messages 494 int si_msg_count; // number of words added since last message 495 char *si_info; // info text chars or NULL 496 int si_region_count; // number of regions supported (1 when there 497 // are no regions) 498 char si_region_name[MAXREGIONS * 2 + 1]; 499 // region names; used only if 500 // si_region_count > 1) 501 502 garray_T si_rep; // list of fromto_T entries from REP lines 503 garray_T si_repsal; // list of fromto_T entries from REPSAL lines 504 garray_T si_sal; // list of fromto_T entries from SAL lines 505 char *si_sofofr; // SOFOFROM text 506 char *si_sofoto; // SOFOTO text 507 int si_nosugfile; // NOSUGFILE item found 508 int si_nosplitsugs; // NOSPLITSUGS item found 509 int si_nocompoundsugs; // NOCOMPOUNDSUGS item found 510 int si_followup; // soundsalike: ? 511 int si_collapse; // soundsalike: ? 512 hashtab_T si_commonwords; // hashtable for common words 513 time_t si_sugtime; // timestamp for .sug file 514 int si_rem_accents; // soundsalike: remove accents 515 garray_T si_map; // MAP info concatenated 516 char *si_midword; // MIDWORD chars or NULL 517 int si_compmax; // max nr of words for compounding 518 int si_compminlen; // minimal length for compounding 519 int si_compsylmax; // max nr of syllables for compounding 520 int si_compoptions; // COMP_ flags 521 garray_T si_comppat; // CHECKCOMPOUNDPATTERN items, each stored as 522 // a string 523 char *si_compflags; // flags used for compounding 524 char si_nobreak; // NOBREAK 525 char *si_syllable; // syllable string 526 garray_T si_prefcond; // table with conditions for postponed 527 // prefixes, each stored as a string 528 int si_newprefID; // current value for ah_newID 529 int si_newcompID; // current value for compound ID 530 } spellinfo_T; 531 532 #include "spellfile.c.generated.h" 533 534 /// Read n bytes from fd to buf, returning on errors 535 /// 536 /// @param[out] buf Buffer to read to, must be at least n bytes long. 537 /// @param[in] n Amount of bytes to read. 538 /// @param fd FILE* to read from. 539 /// @param exit_code Code to run before returning. 540 /// 541 /// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if 542 /// there are not enough bytes, returns SP_OTHERERROR if reading failed. 543 #define SPELL_READ_BYTES(buf, n, fd, exit_code) \ 544 do { \ 545 const size_t n__SPRB = (n); \ 546 FILE *const fd__SPRB = (fd); \ 547 char *const buf__SPRB = (buf); \ 548 const size_t read_bytes__SPRB = fread(buf__SPRB, 1, n__SPRB, fd__SPRB); \ 549 if (read_bytes__SPRB != n__SPRB) { \ 550 exit_code; \ 551 return feof(fd__SPRB) ? SP_TRUNCERROR : SP_OTHERERROR; \ 552 } \ 553 } while (0) 554 555 /// Like #SPELL_READ_BYTES, but also error out if NUL byte was read 556 /// 557 /// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if 558 /// there are not enough bytes, returns SP_OTHERERROR if reading failed, 559 /// returns SP_FORMERROR if read out a NUL byte. 560 #define SPELL_READ_NONNUL_BYTES(buf, n, fd, exit_code) \ 561 do { \ 562 const size_t n__SPRNB = (n); \ 563 FILE *const fd__SPRNB = (fd); \ 564 char *const buf__SPRNB = (buf); \ 565 SPELL_READ_BYTES(buf__SPRNB, n__SPRNB, fd__SPRNB, exit_code); \ 566 if (memchr(buf__SPRNB, NUL, (size_t)n__SPRNB)) { \ 567 exit_code; \ 568 return SP_FORMERROR; \ 569 } \ 570 } while (0) 571 572 /// Check that spell file starts with a magic string 573 /// 574 /// Does not check for version of the file. 575 /// 576 /// @param fd File to check. 577 /// 578 /// @return 0 in case of success, SP_TRUNCERROR if file contains not enough 579 /// bytes, SP_FORMERROR if it does not match magic string and 580 /// SP_OTHERERROR if reading file failed. 581 static inline int spell_check_magic_string(FILE *const fd) 582 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE 583 { 584 char buf[VIMSPELLMAGICL]; 585 SPELL_READ_BYTES(buf, VIMSPELLMAGICL, fd,; ); 586 if (memcmp(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) { 587 return SP_FORMERROR; 588 } 589 return 0; 590 } 591 592 /// Load one spell file and store the info into a slang_T. 593 /// 594 /// This is invoked in three ways: 595 /// - From spell_load_cb() to load a spell file for the first time. "lang" is 596 /// the language name, "old_lp" is NULL. Will allocate an slang_T. 597 /// - To reload a spell file that was changed. "lang" is NULL and "old_lp" 598 /// points to the existing slang_T. 599 /// - Just after writing a .spl file; it's read back to produce the .sug file. 600 /// "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. 601 /// 602 /// @param silent no error if file doesn't exist 603 /// 604 /// @return the slang_T the spell file was loaded into. NULL for error. 605 slang_T *spell_load_file(char *fname, char *lang, slang_T *old_lp, bool silent) 606 { 607 char *p; 608 slang_T *lp = NULL; 609 int res; 610 bool did_estack_push = false; 611 ESTACK_CHECK_DECLARATION; 612 613 FILE *fd = os_fopen(fname, "r"); 614 if (fd == NULL) { 615 if (!silent) { 616 semsg(_(e_notopen), fname); 617 } else if (p_verbose > 2) { 618 verbose_enter(); 619 smsg(0, e_notopen, fname); 620 verbose_leave(); 621 } 622 goto endFAIL; 623 } 624 if (p_verbose > 2) { 625 verbose_enter(); 626 smsg(0, _("Reading spell file \"%s\""), fname); 627 verbose_leave(); 628 } 629 630 if (old_lp == NULL) { 631 lp = slang_alloc(lang); 632 633 // Remember the file name, used to reload the file when it's updated. 634 lp->sl_fname = xstrdup(fname); 635 636 // Check for .add.spl. 637 lp->sl_add = strstr(path_tail(fname), SPL_FNAME_ADD) != NULL; 638 } else { 639 lp = old_lp; 640 } 641 642 // Set sourcing_name, so that error messages mention the file name. 643 estack_push(ETYPE_SPELL, fname, 0); 644 ESTACK_CHECK_SETUP; 645 did_estack_push = true; 646 647 // <HEADER>: <fileID> 648 const int scms_ret = spell_check_magic_string(fd); 649 switch (scms_ret) { 650 case SP_FORMERROR: 651 case SP_TRUNCERROR: 652 semsg("%s", _("E757: This does not look like a spell file")); 653 goto endFAIL; 654 case SP_OTHERERROR: 655 semsg(_("E5042: Failed to read spell file %s: %s"), 656 fname, strerror(ferror(fd))); 657 goto endFAIL; 658 case 0: 659 break; 660 } 661 int c = getc(fd); // <versionnr> 662 if (c < VIMSPELLVERSION) { 663 emsg(_("E771: Old spell file, needs to be updated")); 664 goto endFAIL; 665 } else if (c > VIMSPELLVERSION) { 666 emsg(_("E772: Spell file is for newer version of Vim")); 667 goto endFAIL; 668 } 669 670 // <SECTIONS>: <section> ... <sectionend> 671 // <section>: <sectionID> <sectionflags> <sectionlen> (section contents) 672 while (true) { 673 int n = getc(fd); // <sectionID> or <sectionend> 674 if (n == SN_END) { 675 break; 676 } 677 c = getc(fd); // <sectionflags> 678 int len = get4c(fd); // <sectionlen> 679 if (len < 0) { 680 goto truncerr; 681 } 682 683 res = 0; 684 switch (n) { 685 case SN_INFO: 686 XFREE_CLEAR(lp->sl_info); 687 lp->sl_info = read_string(fd, (size_t)len); // <infotext> 688 if (lp->sl_info == NULL) { 689 goto endFAIL; 690 } 691 break; 692 693 case SN_REGION: 694 res = read_region_section(fd, lp, len); 695 break; 696 697 case SN_CHARFLAGS: 698 res = read_charflags_section(fd); 699 break; 700 701 case SN_MIDWORD: 702 lp->sl_midword = read_string(fd, (size_t)len); // <midword> 703 if (lp->sl_midword == NULL) { 704 goto endFAIL; 705 } 706 break; 707 708 case SN_PREFCOND: 709 res = read_prefcond_section(fd, lp); 710 break; 711 712 case SN_REP: 713 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); 714 break; 715 716 case SN_REPSAL: 717 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); 718 break; 719 720 case SN_SAL: 721 res = read_sal_section(fd, lp); 722 break; 723 724 case SN_SOFO: 725 res = read_sofo_section(fd, lp); 726 break; 727 728 case SN_MAP: 729 p = read_string(fd, (size_t)len); // <mapstr> 730 if (p == NULL) { 731 goto endFAIL; 732 } 733 set_map_str(lp, p); 734 xfree(p); 735 break; 736 737 case SN_WORDS: 738 res = read_words_section(fd, lp, len); 739 break; 740 741 case SN_SUGFILE: 742 lp->sl_sugtime = get8ctime(fd); // <timestamp> 743 break; 744 745 case SN_NOSPLITSUGS: 746 lp->sl_nosplitsugs = true; 747 break; 748 749 case SN_NOCOMPOUNDSUGS: 750 lp->sl_nocompoundsugs = true; 751 break; 752 753 case SN_COMPOUND: 754 res = read_compound(fd, lp, len); 755 break; 756 757 case SN_NOBREAK: 758 lp->sl_nobreak = true; 759 break; 760 761 case SN_SYLLABLE: 762 lp->sl_syllable = read_string(fd, (size_t)len); // <syllable> 763 if (lp->sl_syllable == NULL) { 764 goto endFAIL; 765 } 766 if (init_syl_tab(lp) != OK) { 767 goto endFAIL; 768 } 769 break; 770 771 default: 772 // Unsupported section. When it's required give an error 773 // message. When it's not required skip the contents. 774 if (c & SNF_REQUIRED) { 775 emsg(_("E770: Unsupported section in spell file")); 776 goto endFAIL; 777 } 778 while (--len >= 0) { 779 if (getc(fd) < 0) { 780 goto truncerr; 781 } 782 } 783 break; 784 } 785 someerror: 786 if (res == SP_FORMERROR) { 787 emsg(_(e_format)); 788 goto endFAIL; 789 } 790 if (res == SP_TRUNCERROR) { 791 truncerr: 792 emsg(_(e_spell_trunc)); 793 goto endFAIL; 794 } 795 if (res == SP_OTHERERROR) { 796 goto endFAIL; 797 } 798 } 799 800 // <LWORDTREE> 801 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fbyts_len, 802 &lp->sl_fidxs, false, 0); 803 if (res != 0) { 804 goto someerror; 805 } 806 807 // <KWORDTREE> 808 res = spell_read_tree(fd, &lp->sl_kbyts, NULL, &lp->sl_kidxs, false, 0); 809 if (res != 0) { 810 goto someerror; 811 } 812 813 // <PREFIXTREE> 814 res = spell_read_tree(fd, &lp->sl_pbyts, NULL, &lp->sl_pidxs, true, 815 lp->sl_prefixcnt); 816 if (res != 0) { 817 goto someerror; 818 } 819 820 // For a new file link it in the list of spell files. 821 if (old_lp == NULL && lang != NULL) { 822 lp->sl_next = first_lang; 823 first_lang = lp; 824 } 825 826 goto endOK; 827 828 endFAIL: 829 if (lang != NULL) { 830 // truncating the name signals the error to spell_load_lang() 831 *lang = NUL; 832 } 833 if (lp != NULL && old_lp == NULL) { 834 slang_free(lp); 835 } 836 lp = NULL; 837 838 endOK: 839 if (fd != NULL) { 840 fclose(fd); 841 } 842 if (did_estack_push) { 843 ESTACK_CHECK_NOW; 844 estack_pop(); 845 } 846 847 return lp; 848 } 849 850 // Fill in the wordcount fields for a trie. 851 // Returns the total number of words. 852 static void tree_count_words(const uint8_t *byts, idx_T *idxs) 853 { 854 idx_T arridx[MAXWLEN]; 855 int curi[MAXWLEN]; 856 int wordcount[MAXWLEN]; 857 858 arridx[0] = 0; 859 curi[0] = 1; 860 wordcount[0] = 0; 861 int depth = 0; 862 while (depth >= 0 && !got_int) { 863 if (curi[depth] > byts[arridx[depth]]) { 864 // Done all bytes at this node, go up one level. 865 idxs[arridx[depth]] = wordcount[depth]; 866 if (depth > 0) { 867 wordcount[depth - 1] += wordcount[depth]; 868 } 869 870 depth--; 871 fast_breakcheck(); 872 } else { 873 // Do one more byte at this node. 874 idx_T n = arridx[depth] + curi[depth]; 875 curi[depth]++; 876 877 int c = byts[n]; 878 if (c == 0) { 879 // End of word, count it. 880 wordcount[depth]++; 881 882 // Skip over any other NUL bytes (same word with different 883 // flags). 884 while (byts[n + 1] == 0) { 885 n++; 886 curi[depth]++; 887 } 888 } else { 889 // Normal char, go one level deeper to count the words. 890 depth++; 891 arridx[depth] = idxs[n]; 892 curi[depth] = 1; 893 wordcount[depth] = 0; 894 } 895 } 896 } 897 } 898 899 /// Load the .sug files for languages that have one and weren't loaded yet. 900 void suggest_load_files(void) 901 { 902 char buf[MAXWLEN]; 903 garray_T ga; 904 905 // Do this for all languages that support sound folding. 906 for (int lpi = 0; lpi < curwin->w_s->b_langp.ga_len; lpi++) { 907 langp_T *lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi); 908 slang_T *slang = lp->lp_slang; 909 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) { 910 // Change ".spl" to ".sug" and open the file. When the file isn't 911 // found silently skip it. Do set "sl_sugloaded" so that we 912 // don't try again and again. 913 slang->sl_sugloaded = true; 914 915 char *dotp = strrchr(slang->sl_fname, '.'); 916 if (dotp == NULL || path_fnamecmp(dotp, ".spl") != 0) { 917 continue; 918 } 919 STRCPY(dotp, ".sug"); 920 FILE *fd = os_fopen(slang->sl_fname, "r"); 921 if (fd == NULL) { 922 goto nextone; 923 } 924 925 // <SUGHEADER>: <fileID> <versionnr> <timestamp> 926 for (int i = 0; i < VIMSUGMAGICL; i++) { 927 buf[i] = (char)getc(fd); // <fileID> 928 } 929 if (strncmp(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) { 930 semsg(_("E778: This does not look like a .sug file: %s"), 931 slang->sl_fname); 932 goto nextone; 933 } 934 int c = getc(fd); // <versionnr> 935 if (c < VIMSUGVERSION) { 936 semsg(_("E779: Old .sug file, needs to be updated: %s"), 937 slang->sl_fname); 938 goto nextone; 939 } else if (c > VIMSUGVERSION) { 940 semsg(_("E780: .sug file is for newer version of Vim: %s"), 941 slang->sl_fname); 942 goto nextone; 943 } 944 945 // Check the timestamp, it must be exactly the same as the one in 946 // the .spl file. Otherwise the word numbers won't match. 947 time_t timestamp = get8ctime(fd); // <timestamp> 948 if (timestamp != slang->sl_sugtime) { 949 semsg(_("E781: .sug file doesn't match .spl file: %s"), 950 slang->sl_fname); 951 goto nextone; 952 } 953 954 // <SUGWORDTREE>: <wordtree> 955 // Read the trie with the soundfolded words. 956 if (spell_read_tree(fd, &slang->sl_sbyts, NULL, &slang->sl_sidxs, 957 false, 0) != 0) { 958 someerror: 959 semsg(_(e_error_while_reading_sug_file_str), 960 slang->sl_fname); 961 slang_clear_sug(slang); 962 goto nextone; 963 } 964 965 // <SUGTABLE>: <sugwcount> <sugline> ... 966 // 967 // Read the table with word numbers. We use a file buffer for 968 // this, because it's so much like a file with lines. Makes it 969 // possible to swap the info and save on memory use. 970 slang->sl_sugbuf = open_spellbuf(); 971 972 // <sugwcount> 973 int wcount = get4c(fd); 974 if (wcount < 0) { 975 goto someerror; 976 } 977 978 // Read all the wordnr lists into the buffer, one NUL terminated 979 // list per line. 980 ga_init(&ga, 1, 100); 981 for (int wordnr = 0; wordnr < wcount; wordnr++) { 982 ga.ga_len = 0; 983 while (true) { 984 c = getc(fd); // <sugline> 985 if (c < 0) { 986 goto someerror; 987 } 988 GA_APPEND(uint8_t, &ga, (uint8_t)c); 989 if (c == NUL) { 990 break; 991 } 992 } 993 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, 994 ga.ga_data, ga.ga_len, true) == FAIL) { 995 goto someerror; 996 } 997 } 998 ga_clear(&ga); 999 1000 // Need to put word counts in the word tries, so that we can find 1001 // a word by its number. 1002 tree_count_words(slang->sl_fbyts, slang->sl_fidxs); 1003 tree_count_words(slang->sl_sbyts, slang->sl_sidxs); 1004 1005 nextone: 1006 if (fd != NULL) { 1007 fclose(fd); 1008 } 1009 STRCPY(dotp, ".spl"); 1010 } 1011 } 1012 } 1013 1014 // Read a length field from "fd" in "cnt_bytes" bytes. 1015 // Allocate memory, read the string into it and add a NUL at the end. 1016 // Returns NULL when the count is zero. 1017 // Sets "*cntp" to SP_*ERROR when there is an error, length of the result 1018 // otherwise. 1019 static char *read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) 1020 { 1021 int cnt = 0; 1022 1023 // read the length bytes, MSB first 1024 for (int i = 0; i < cnt_bytes; i++) { 1025 const int c = getc(fd); 1026 1027 if (c == EOF) { 1028 *cntp = SP_TRUNCERROR; 1029 return NULL; 1030 } 1031 cnt = (int)(((unsigned)cnt << 8) + (unsigned)c); 1032 } 1033 *cntp = cnt; 1034 if (cnt == 0) { 1035 return NULL; // nothing to read, return NULL 1036 } 1037 char *str = read_string(fd, (size_t)cnt); 1038 if (str == NULL) { 1039 *cntp = SP_OTHERERROR; 1040 } 1041 return str; 1042 } 1043 1044 // Read SN_REGION: <regionname> ... 1045 // Return SP_*ERROR flags. 1046 static int read_region_section(FILE *fd, slang_T *lp, int len) 1047 { 1048 if (len > MAXREGIONS * 2) { 1049 return SP_FORMERROR; 1050 } 1051 SPELL_READ_NONNUL_BYTES(lp->sl_regions, (size_t)len, fd,; ); 1052 lp->sl_regions[len] = NUL; 1053 return 0; 1054 } 1055 1056 // Read SN_CHARFLAGS section: <charflagslen> <charflags> 1057 // <folcharslen> <folchars> 1058 // Return SP_*ERROR flags. 1059 static int read_charflags_section(FILE *fd) 1060 { 1061 int flagslen, follen; 1062 1063 // <charflagslen> <charflags> 1064 char *flags = read_cnt_string(fd, 1, &flagslen); 1065 if (flagslen < 0) { 1066 return flagslen; 1067 } 1068 1069 // <folcharslen> <folchars> 1070 char *fol = read_cnt_string(fd, 2, &follen); 1071 if (follen < 0) { 1072 xfree(flags); 1073 return follen; 1074 } 1075 1076 // Set the word-char flags and fill SPELL_ISUPPER() table. 1077 if (flags != NULL && fol != NULL) { 1078 set_spell_charflags(flags, flagslen, fol); 1079 } 1080 1081 xfree(flags); 1082 xfree(fol); 1083 1084 // When <charflagslen> is zero then <fcharlen> must also be zero. 1085 if ((flags == NULL) != (fol == NULL)) { 1086 return SP_FORMERROR; 1087 } 1088 return 0; 1089 } 1090 1091 // Read SN_PREFCOND section. 1092 // Return SP_*ERROR flags. 1093 static int read_prefcond_section(FILE *fd, slang_T *lp) 1094 { 1095 // <prefcondcnt> <prefcond> ... 1096 const int cnt = get2c(fd); // <prefcondcnt> 1097 if (cnt <= 0) { 1098 return SP_FORMERROR; 1099 } 1100 1101 lp->sl_prefprog = xcalloc((size_t)cnt, sizeof(regprog_T *)); 1102 lp->sl_prefixcnt = cnt; 1103 1104 for (int i = 0; i < cnt; i++) { 1105 // <prefcond> : <condlen> <condstr> 1106 const int n = getc(fd); // <condlen> 1107 if (n < 0 || n >= MAXWLEN) { 1108 return SP_FORMERROR; 1109 } 1110 1111 // When <condlen> is zero we have an empty condition. Otherwise 1112 // compile the regexp program used to check for the condition. 1113 if (n > 0) { 1114 char buf[MAXWLEN + 1]; 1115 buf[0] = '^'; // always match at one position only 1116 SPELL_READ_NONNUL_BYTES(buf + 1, (size_t)n, fd,; ); 1117 buf[n + 1] = NUL; 1118 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC | RE_STRING); 1119 } 1120 } 1121 return 0; 1122 } 1123 1124 // Read REP or REPSAL items section from "fd": <repcount> <rep> ... 1125 // Return SP_*ERROR flags. 1126 static int read_rep_section(FILE *fd, garray_T *gap, int16_t *first) 1127 { 1128 fromto_T *ftp; 1129 1130 int cnt = get2c(fd); // <repcount> 1131 if (cnt < 0) { 1132 return SP_TRUNCERROR; 1133 } 1134 1135 ga_grow(gap, cnt); 1136 1137 // <rep> : <repfromlen> <repfrom> <reptolen> <repto> 1138 for (; gap->ga_len < cnt; gap->ga_len++) { 1139 int c; 1140 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; 1141 ftp->ft_from = read_cnt_string(fd, 1, &c); 1142 if (c < 0) { 1143 return c; 1144 } 1145 if (c == 0) { 1146 return SP_FORMERROR; 1147 } 1148 ftp->ft_to = read_cnt_string(fd, 1, &c); 1149 if (c <= 0) { 1150 xfree(ftp->ft_from); 1151 if (c < 0) { 1152 return c; 1153 } 1154 return SP_FORMERROR; 1155 } 1156 } 1157 1158 // Fill the first-index table. 1159 for (int i = 0; i < 256; i++) { 1160 first[i] = -1; 1161 } 1162 for (int i = 0; i < gap->ga_len; i++) { 1163 ftp = &((fromto_T *)gap->ga_data)[i]; 1164 if (first[(uint8_t)(*ftp->ft_from)] == -1) { 1165 first[(uint8_t)(*ftp->ft_from)] = (int16_t)i; 1166 } 1167 } 1168 return 0; 1169 } 1170 1171 // Read SN_SAL section: <salflags> <salcount> <sal> ... 1172 // Return SP_*ERROR flags. 1173 static int read_sal_section(FILE *fd, slang_T *slang) 1174 { 1175 slang->sl_sofo = false; 1176 1177 const int flags = getc(fd); // <salflags> 1178 if (flags & SAL_F0LLOWUP) { 1179 slang->sl_followup = true; 1180 } 1181 if (flags & SAL_COLLAPSE) { 1182 slang->sl_collapse = true; 1183 } 1184 if (flags & SAL_REM_ACCENTS) { 1185 slang->sl_rem_accents = true; 1186 } 1187 1188 int cnt = get2c(fd); // <salcount> 1189 if (cnt < 0) { 1190 return SP_TRUNCERROR; 1191 } 1192 1193 garray_T *gap = &slang->sl_sal; 1194 ga_init(gap, sizeof(salitem_T), 10); 1195 ga_grow(gap, cnt + 1); 1196 1197 // <sal> : <salfromlen> <salfrom> <saltolen> <salto> 1198 for (; gap->ga_len < cnt; gap->ga_len++) { 1199 int c = NUL; 1200 1201 salitem_T *smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 1202 int ccnt = getc(fd); // <salfromlen> 1203 if (ccnt < 0) { 1204 return SP_TRUNCERROR; 1205 } 1206 char *p = xmalloc((size_t)ccnt + 2); 1207 smp->sm_lead = p; 1208 1209 // Read up to the first special char into sm_lead. 1210 int i = 0; 1211 for (; i < ccnt; i++) { 1212 c = getc(fd); // <salfrom> 1213 if (vim_strchr("0123456789(-<^$", c) != NULL) { 1214 break; 1215 } 1216 *p++ = (char)(uint8_t)c; 1217 } 1218 smp->sm_leadlen = (int)(p - smp->sm_lead); 1219 *p++ = NUL; 1220 1221 // Put (abc) chars in sm_oneof, if any. 1222 if (c == '(') { 1223 smp->sm_oneof = p; 1224 for (++i; i < ccnt; i++) { 1225 c = getc(fd); // <salfrom> 1226 if (c == ')') { 1227 break; 1228 } 1229 *p++ = (char)(uint8_t)c; 1230 } 1231 *p++ = NUL; 1232 if (++i < ccnt) { 1233 c = getc(fd); 1234 } 1235 } else { 1236 smp->sm_oneof = NULL; 1237 } 1238 1239 // Any following chars go in sm_rules. 1240 smp->sm_rules = p; 1241 if (i < ccnt) { 1242 // store the char we got while checking for end of sm_lead 1243 *p++ = (char)(uint8_t)c; 1244 } 1245 i++; 1246 if (i < ccnt) { 1247 SPELL_READ_NONNUL_BYTES( // <salfrom> 1248 p, (size_t)(ccnt - i), fd, 1249 xfree(smp->sm_lead)); 1250 p += (ccnt - i); 1251 } 1252 *p++ = NUL; 1253 1254 // <saltolen> <salto> 1255 smp->sm_to = read_cnt_string(fd, 1, &ccnt); 1256 if (ccnt < 0) { 1257 xfree(smp->sm_lead); 1258 return ccnt; 1259 } 1260 1261 // convert the multi-byte strings to wide char strings 1262 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 1263 smp->sm_leadlen = mb_charlen(smp->sm_lead); 1264 if (smp->sm_oneof == NULL) { 1265 smp->sm_oneof_w = NULL; 1266 } else { 1267 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); 1268 } 1269 if (smp->sm_to == NULL) { 1270 smp->sm_to_w = NULL; 1271 } else { 1272 smp->sm_to_w = mb_str2wide(smp->sm_to); 1273 } 1274 } 1275 1276 if (!GA_EMPTY(gap)) { 1277 // Add one extra entry to mark the end with an empty sm_lead. Avoids 1278 // that we need to check the index every time. 1279 salitem_T *smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; 1280 char *p = xmalloc(1); 1281 p[0] = NUL; 1282 smp->sm_lead = p; 1283 smp->sm_lead_w = mb_str2wide(smp->sm_lead); 1284 smp->sm_leadlen = 0; 1285 smp->sm_oneof = NULL; 1286 smp->sm_oneof_w = NULL; 1287 smp->sm_rules = p; 1288 smp->sm_to = NULL; 1289 smp->sm_to_w = NULL; 1290 gap->ga_len++; 1291 } 1292 1293 // Fill the first-index table. 1294 set_sal_first(slang); 1295 1296 return 0; 1297 } 1298 1299 // Read SN_WORDS: <word> ... 1300 // Return SP_*ERROR flags. 1301 static int read_words_section(FILE *fd, slang_T *lp, int len) 1302 { 1303 int done = 0; 1304 int i; 1305 uint8_t word[MAXWLEN]; 1306 1307 while (done < len) { 1308 // Read one word at a time. 1309 for (i = 0;; i++) { 1310 int c = getc(fd); 1311 if (c == EOF) { 1312 return SP_TRUNCERROR; 1313 } 1314 word[i] = (uint8_t)c; 1315 if (word[i] == NUL) { 1316 break; 1317 } 1318 if (i == MAXWLEN - 1) { 1319 return SP_FORMERROR; 1320 } 1321 } 1322 1323 // Init the count to 10. 1324 count_common_word(lp, (char *)word, -1, 10); 1325 done += i + 1; 1326 } 1327 return 0; 1328 } 1329 1330 // SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 1331 // Return SP_*ERROR flags. 1332 static int read_sofo_section(FILE *fd, slang_T *slang) 1333 { 1334 int cnt; 1335 int res; 1336 1337 slang->sl_sofo = true; 1338 1339 // <sofofromlen> <sofofrom> 1340 char *from = read_cnt_string(fd, 2, &cnt); 1341 if (cnt < 0) { 1342 return cnt; 1343 } 1344 1345 // <sofotolen> <sofoto> 1346 char *to = read_cnt_string(fd, 2, &cnt); 1347 if (cnt < 0) { 1348 xfree(from); 1349 return cnt; 1350 } 1351 1352 // Store the info in slang->sl_sal and/or slang->sl_sal_first. 1353 if (from != NULL && to != NULL) { 1354 res = set_sofo(slang, from, to); 1355 } else if (from != NULL || to != NULL) { 1356 res = SP_FORMERROR; // only one of two strings is an error 1357 } else { 1358 res = 0; 1359 } 1360 1361 xfree(from); 1362 xfree(to); 1363 return res; 1364 } 1365 1366 // Read the compound section from the .spl file: 1367 // <compmax> <compminlen> <compsylmax> <compoptions> <compflags> 1368 // Returns SP_*ERROR flags. 1369 static int read_compound(FILE *fd, slang_T *slang, int len) 1370 { 1371 int todo = len; 1372 int cnt; 1373 1374 if (todo < 2) { 1375 return SP_FORMERROR; // need at least two bytes 1376 } 1377 todo--; 1378 int c = getc(fd); // <compmax> 1379 if (c < 2) { 1380 c = MAXWLEN; 1381 } 1382 slang->sl_compmax = c; 1383 1384 todo--; 1385 c = getc(fd); // <compminlen> 1386 if (c < 1) { 1387 c = 0; 1388 } 1389 slang->sl_compminlen = c; 1390 1391 todo--; 1392 c = getc(fd); // <compsylmax> 1393 if (c < 1) { 1394 c = MAXWLEN; 1395 } 1396 slang->sl_compsylmax = c; 1397 1398 c = getc(fd); // <compoptions> 1399 if (c != 0) { 1400 ungetc(c, fd); // be backwards compatible with Vim 7.0b 1401 } else { 1402 todo--; 1403 c = getc(fd); // only use the lower byte for now 1404 todo--; 1405 slang->sl_compoptions = c; 1406 1407 garray_T *gap = &slang->sl_comppat; 1408 c = get2c(fd); // <comppatcount> 1409 if (c < 0) { 1410 return SP_TRUNCERROR; 1411 } 1412 todo -= 2; 1413 ga_init(gap, sizeof(char *), c); 1414 ga_grow(gap, c); 1415 while (--c >= 0) { 1416 ((char **)(gap->ga_data))[gap->ga_len++] = read_cnt_string(fd, 1, &cnt); 1417 // <comppatlen> <comppattext> 1418 if (cnt < 0) { 1419 return cnt; 1420 } 1421 todo -= cnt + 1; 1422 } 1423 } 1424 if (todo < 0) { 1425 return SP_FORMERROR; 1426 } 1427 1428 // Turn the COMPOUNDRULE items into a regexp pattern: 1429 // "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". 1430 // Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. 1431 // Conversion to utf-8 may double the size. 1432 c = todo * 2 + 7; 1433 c += todo * 2; 1434 char *pat = xmalloc((size_t)c); 1435 1436 // We also need a list of all flags that can appear at the start and one 1437 // for all flags. 1438 uint8_t *cp = xmalloc((size_t)todo + 1); 1439 slang->sl_compstartflags = cp; 1440 *cp = NUL; 1441 1442 uint8_t *ap = xmalloc((size_t)todo + 1); 1443 slang->sl_compallflags = ap; 1444 *ap = NUL; 1445 1446 // And a list of all patterns in their original form, for checking whether 1447 // compounding may work in match_compoundrule(). This is freed when we 1448 // encounter a wildcard, the check doesn't work then. 1449 uint8_t *crp = xmalloc((size_t)todo + 1); 1450 slang->sl_comprules = crp; 1451 1452 char *pp = pat; 1453 *pp++ = '^'; 1454 *pp++ = '\\'; 1455 *pp++ = '('; 1456 1457 int atstart = 1; 1458 while (todo-- > 0) { 1459 c = getc(fd); // <compflags> 1460 if (c == EOF) { 1461 xfree(pat); 1462 return SP_TRUNCERROR; 1463 } 1464 1465 // Add all flags to "sl_compallflags". 1466 if (vim_strchr("?*+[]/", c) == NULL 1467 && !byte_in_str(slang->sl_compallflags, c)) { 1468 *ap++ = (uint8_t)c; 1469 *ap = NUL; 1470 } 1471 1472 if (atstart != 0) { 1473 // At start of item: copy flags to "sl_compstartflags". For a 1474 // [abc] item set "atstart" to 2 and copy up to the ']'. 1475 if (c == '[') { 1476 atstart = 2; 1477 } else if (c == ']') { 1478 atstart = 0; 1479 } else { 1480 if (!byte_in_str(slang->sl_compstartflags, c)) { 1481 *cp++ = (uint8_t)c; 1482 *cp = NUL; 1483 } 1484 if (atstart == 1) { 1485 atstart = 0; 1486 } 1487 } 1488 } 1489 1490 // Copy flag to "sl_comprules", unless we run into a wildcard. 1491 if (crp != NULL) { 1492 if (c == '?' || c == '+' || c == '*') { 1493 XFREE_CLEAR(slang->sl_comprules); 1494 crp = NULL; 1495 } else { 1496 *crp++ = (uint8_t)c; 1497 } 1498 } 1499 1500 if (c == '/') { // slash separates two items 1501 *pp++ = '\\'; 1502 *pp++ = '|'; 1503 atstart = 1; 1504 } else { // normal char, "[abc]" and '*' are copied as-is 1505 if (c == '?' || c == '+' || c == '~') { 1506 *pp++ = '\\'; // "a?" becomes "a\?", "a+" becomes "a\+" 1507 } 1508 pp += utf_char2bytes(c, pp); 1509 } 1510 } 1511 1512 *pp++ = '\\'; 1513 *pp++ = ')'; 1514 *pp++ = '$'; 1515 *pp = NUL; 1516 1517 if (crp != NULL) { 1518 *crp = NUL; 1519 } 1520 1521 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); 1522 xfree(pat); 1523 if (slang->sl_compprog == NULL) { 1524 return SP_FORMERROR; 1525 } 1526 1527 return 0; 1528 } 1529 1530 // Set the SOFOFROM and SOFOTO items in language "lp". 1531 // Returns SP_*ERROR flags when there is something wrong. 1532 static int set_sofo(slang_T *lp, const char *from, const char *to) 1533 { 1534 const char *s; 1535 const char *p; 1536 1537 // Use "sl_sal" as an array with 256 pointers to a list of wide 1538 // characters. The index is the low byte of the character. 1539 // The list contains from-to pairs with a terminating NUL. 1540 // sl_sal_first[] is used for latin1 "from" characters. 1541 garray_T *gap = &lp->sl_sal; 1542 ga_init(gap, sizeof(int *), 1); 1543 ga_grow(gap, 256); 1544 memset(gap->ga_data, 0, sizeof(int *) * 256); 1545 gap->ga_len = 256; 1546 1547 // First count the number of items for each list. Temporarily use 1548 // sl_sal_first[] for this. 1549 for (p = from, s = to; *p != NUL && *s != NUL;) { 1550 const int c = mb_cptr2char_adv(&p); 1551 s += utf_ptr2len(s); 1552 if (c >= 256) { 1553 lp->sl_sal_first[c & 0xff]++; 1554 } 1555 } 1556 if (*p != NUL || *s != NUL) { // lengths differ 1557 return SP_FORMERROR; 1558 } 1559 1560 // Allocate the lists. 1561 for (int i = 0; i < 256; i++) { 1562 if (lp->sl_sal_first[i] > 0) { 1563 p = xmalloc(sizeof(int) * (size_t)(lp->sl_sal_first[i] * 2 + 1)); 1564 ((int **)gap->ga_data)[i] = (int *)p; 1565 *(int *)p = 0; 1566 } 1567 } 1568 1569 // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal 1570 // list. 1571 memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); 1572 for (p = from, s = to; *p != NUL && *s != NUL;) { 1573 const int c = mb_cptr2char_adv(&p); 1574 const int i = mb_cptr2char_adv(&s); 1575 if (c >= 256) { 1576 // Append the from-to chars at the end of the list with 1577 // the low byte. 1578 int *inp = ((int **)gap->ga_data)[c & 0xff]; 1579 while (*inp != 0) { 1580 inp++; 1581 } 1582 *inp++ = c; // from char 1583 *inp++ = i; // to char 1584 *inp++ = NUL; // NUL at the end 1585 } else { 1586 // mapping byte to char is done in sl_sal_first[] 1587 lp->sl_sal_first[c] = i; 1588 } 1589 } 1590 1591 return 0; 1592 } 1593 1594 // Fill the first-index table for "lp". 1595 static void set_sal_first(slang_T *lp) 1596 { 1597 garray_T *gap = &lp->sl_sal; 1598 1599 salfirst_T *sfirst = lp->sl_sal_first; 1600 for (int i = 0; i < 256; i++) { 1601 sfirst[i] = -1; 1602 } 1603 salitem_T *smp = (salitem_T *)gap->ga_data; 1604 for (int i = 0; i < gap->ga_len; i++) { 1605 // Use the lowest byte of the first character. For latin1 it's 1606 // the character, for other encodings it should differ for most 1607 // characters. 1608 int c = *smp[i].sm_lead_w & 0xff; 1609 if (sfirst[c] == -1) { 1610 sfirst[c] = i; 1611 1612 // Make sure all entries with this byte are following each 1613 // other. Move the ones that are in the wrong position. Do 1614 // keep the same ordering! 1615 while (i + 1 < gap->ga_len 1616 && (*smp[i + 1].sm_lead_w & 0xff) == c) { 1617 // Skip over entry with same index byte. 1618 i++; 1619 } 1620 1621 for (int n = 1; i + n < gap->ga_len; n++) { 1622 if ((*smp[i + n].sm_lead_w & 0xff) == c) { 1623 salitem_T tsal; 1624 1625 // Move entry with same index byte after the entries 1626 // we already found. 1627 i++; 1628 n--; 1629 tsal = smp[i + n]; 1630 memmove(smp + i + 1, smp + i, sizeof(salitem_T) * (size_t)n); 1631 smp[i] = tsal; 1632 } 1633 } 1634 } 1635 } 1636 } 1637 1638 // Turn a multi-byte string into a wide character string. 1639 // Return it in allocated memory. 1640 static int *mb_str2wide(const char *s) 1641 { 1642 int i = 0; 1643 1644 int *res = xmalloc(((size_t)mb_charlen(s) + 1) * sizeof(int)); 1645 for (const char *p = s; *p != NUL;) { 1646 res[i++] = mb_ptr2char_adv(&p); 1647 } 1648 res[i] = NUL; 1649 1650 return res; 1651 } 1652 1653 /// Reads a tree from the .spl or .sug file. 1654 /// Allocates the memory and stores pointers in "bytsp" and "idxsp". 1655 /// This is skipped when the tree has zero length. 1656 /// 1657 /// @param prefixtree true for the prefix tree 1658 /// @param prefixcnt when "prefixtree" is true: prefix count 1659 /// 1660 /// @return zero when OK, SP_ value for an error. 1661 static int spell_read_tree(FILE *fd, uint8_t **bytsp, int *bytsp_len, idx_T **idxsp, 1662 bool prefixtree, int prefixcnt) 1663 FUNC_ATTR_NONNULL_ARG(1, 2, 4) 1664 { 1665 // The tree size was computed when writing the file, so that we can 1666 // allocate it as one long block. <nodecount> 1667 int len = get4c(fd); 1668 if (len < 0) { 1669 return SP_TRUNCERROR; 1670 } 1671 if ((size_t)len >= SIZE_MAX / sizeof(int)) { 1672 // Invalid length, multiply with sizeof(int) would overflow. 1673 return SP_FORMERROR; 1674 } 1675 if (len <= 0) { 1676 return 0; 1677 } 1678 1679 // Allocate the byte array. 1680 uint8_t *bp = xmalloc((size_t)len); 1681 *bytsp = bp; 1682 if (bytsp_len != NULL) { 1683 *bytsp_len = len; 1684 } 1685 1686 // Allocate the index array. 1687 idx_T *ip = xcalloc((size_t)len, sizeof(*ip)); 1688 *idxsp = ip; 1689 1690 // Recursively read the tree and store it in the array. 1691 int idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); 1692 if (idx < 0) { 1693 return idx; 1694 } 1695 return 0; 1696 } 1697 1698 /// Read one row of siblings from the spell file and store it in the byte array 1699 /// "byts" and index array "idxs". Recursively read the children. 1700 /// 1701 /// NOTE: The code here must match put_node()! 1702 /// 1703 /// Returns the index (>= 0) following the siblings. 1704 /// Returns SP_TRUNCERROR if the file is shorter than expected. 1705 /// Returns SP_FORMERROR if there is a format error. 1706 /// 1707 /// @param maxidx size of arrays 1708 /// @param startidx current index in "byts" and "idxs" 1709 /// @param prefixtree true for reading PREFIXTREE 1710 /// @param maxprefcondnr maximum for <prefcondnr> 1711 static idx_T read_tree_node(FILE *fd, uint8_t *byts, idx_T *idxs, int maxidx, idx_T startidx, 1712 bool prefixtree, int maxprefcondnr) 1713 { 1714 idx_T idx = startidx; 1715 #define SHARED_MASK 0x8000000 1716 1717 int len = getc(fd); // <siblingcount> 1718 if (len <= 0) { 1719 return SP_TRUNCERROR; 1720 } 1721 1722 if (startidx + len >= maxidx) { 1723 return SP_FORMERROR; 1724 } 1725 byts[idx++] = (uint8_t)len; 1726 1727 // Read the byte values, flag/region bytes and shared indexes. 1728 for (int i = 1; i <= len; i++) { 1729 int c = getc(fd); // <byte> 1730 if (c < 0) { 1731 return SP_TRUNCERROR; 1732 } 1733 if (c <= BY_SPECIAL) { 1734 if (c == BY_NOFLAGS && !prefixtree) { 1735 // No flags, all regions. 1736 idxs[idx] = 0; 1737 } else if (c != BY_INDEX) { 1738 if (prefixtree) { 1739 // Read the optional pflags byte, the prefix ID and the 1740 // condition nr. In idxs[] store the prefix ID in the low 1741 // byte, the condition index shifted up 8 bits, the flags 1742 // shifted up 24 bits. 1743 if (c == BY_FLAGS) { 1744 c = getc(fd) << 24; // <pflags> 1745 } else { 1746 c = 0; 1747 } 1748 1749 c |= getc(fd); // <affixID> 1750 1751 int n = get2c(fd); // <prefcondnr> 1752 if (n >= maxprefcondnr) { 1753 return SP_FORMERROR; 1754 } 1755 c |= (n << 8); 1756 } else { // c must be BY_FLAGS or BY_FLAGS2 1757 // Read flags and optional region and prefix ID. In 1758 // idxs[] the flags go in the low two bytes, region above 1759 // that and prefix ID above the region. 1760 int c2 = c; 1761 c = getc(fd); // <flags> 1762 if (c2 == BY_FLAGS2) { 1763 c = (getc(fd) << 8) + c; // <flags2> 1764 } 1765 if (c & WF_REGION) { 1766 c = (getc(fd) << 16) + c; // <region> 1767 } 1768 if (c & WF_AFX) { 1769 c = (getc(fd) << 24) + c; // <affixID> 1770 } 1771 } 1772 1773 idxs[idx] = c; 1774 c = 0; 1775 } else { // c == BY_INDEX 1776 // <nodeidx> 1777 int n = get3c(fd); 1778 if (n < 0 || n >= maxidx) { 1779 return SP_FORMERROR; 1780 } 1781 idxs[idx] = n + SHARED_MASK; 1782 c = getc(fd); // <xbyte> 1783 } 1784 } 1785 byts[idx++] = (uint8_t)c; 1786 } 1787 1788 // Recursively read the children for non-shared siblings. 1789 // Skip the end-of-word ones (zero byte value) and the shared ones (and 1790 // remove SHARED_MASK) 1791 for (int i = 1; i <= len; i++) { 1792 if (byts[startidx + i] != 0) { 1793 if (idxs[startidx + i] & SHARED_MASK) { 1794 idxs[startidx + i] &= ~SHARED_MASK; 1795 } else { 1796 idxs[startidx + i] = idx; 1797 idx = read_tree_node(fd, byts, idxs, maxidx, idx, prefixtree, maxprefcondnr); 1798 if (idx < 0) { 1799 break; 1800 } 1801 } 1802 } 1803 } 1804 1805 return idx; 1806 } 1807 1808 /// Reload the spell file "fname" if it's loaded. 1809 /// 1810 /// @param added_word invoked through "zg" 1811 static void spell_reload_one(char *fname, bool added_word) 1812 { 1813 bool didit = false; 1814 1815 for (slang_T *slang = first_lang; slang != NULL; slang = slang->sl_next) { 1816 if (path_full_compare(fname, slang->sl_fname, false, true) == kEqualFiles) { 1817 slang_clear(slang); 1818 if (spell_load_file(fname, NULL, slang, false) == NULL) { 1819 // reloading failed, clear the language 1820 slang_clear(slang); 1821 } 1822 redraw_all_later(UPD_SOME_VALID); 1823 didit = true; 1824 } 1825 } 1826 1827 // When "zg" was used and the file wasn't loaded yet, should redo 1828 // 'spelllang' to load it now. 1829 if (added_word && !didit) { 1830 parse_spelllang(curwin); 1831 } 1832 } 1833 1834 // Functions for ":mkspell". 1835 1836 // In the postponed prefixes tree wn_flags is used to store the WFP_ flags, 1837 // but it must be negative to indicate the prefix tree to tree_add_word(). 1838 // Use a negative number with the lower 8 bits zero. 1839 #define PFX_FLAGS (-256) 1840 1841 // flags for "condit" argument of store_aff_word() 1842 #define CONDIT_COMB 1 // affix must combine 1843 #define CONDIT_CFIX 2 // affix must have CIRCUMFIX flag 1844 #define CONDIT_SUF 4 // add a suffix for matching flags 1845 #define CONDIT_AFF 8 // word already has an affix 1846 1847 // Tunable parameters for when the tree is compressed. Filled from the 1848 // 'mkspellmem' option. 1849 static int compress_start = 30000; // memory / SBLOCKSIZE 1850 static int compress_inc = 100; // memory / SBLOCKSIZE 1851 static int compress_added = 500000; // word count 1852 1853 // Check the 'mkspellmem' option. Return FAIL if it's wrong. 1854 // Sets "sps_flags". 1855 int spell_check_msm(void) 1856 { 1857 char *p = p_msm; 1858 1859 if (!ascii_isdigit(*p)) { 1860 return FAIL; 1861 } 1862 // block count = (value * 1024) / SBLOCKSIZE (but avoid overflow) 1863 int start = (getdigits_int(&p, true, 0) * 10) / (SBLOCKSIZE / 102); 1864 if (*p != ',') { 1865 return FAIL; 1866 } 1867 p++; 1868 if (!ascii_isdigit(*p)) { 1869 return FAIL; 1870 } 1871 int incr = (getdigits_int(&p, true, 0) * 102) / (SBLOCKSIZE / 10); 1872 if (*p != ',') { 1873 return FAIL; 1874 } 1875 p++; 1876 if (!ascii_isdigit(*p)) { 1877 return FAIL; 1878 } 1879 int added = getdigits_int(&p, true, 0) * 1024; 1880 if (*p != NUL) { 1881 return FAIL; 1882 } 1883 1884 if (start == 0 || incr == 0 || added == 0 || incr > start) { 1885 return FAIL; 1886 } 1887 1888 compress_start = start; 1889 compress_inc = incr; 1890 compress_added = added; 1891 return OK; 1892 } 1893 1894 #ifdef SPELL_PRINTTREE 1895 // For debugging the tree code: print the current tree in a (more or less) 1896 // readable format, so that we can see what happens when adding a word and/or 1897 // compressing the tree. 1898 // Based on code from Olaf Seibert. 1899 # define PRINTLINESIZE 1000 1900 # define PRINTWIDTH 6 1901 1902 # define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ 1903 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, \ 1904 a2) 1905 1906 static char line1[PRINTLINESIZE]; 1907 static char line2[PRINTLINESIZE]; 1908 static char line3[PRINTLINESIZE]; 1909 1910 static void spell_clear_flags(wordnode_T *node) 1911 { 1912 wordnode_T *np; 1913 1914 for (np = node; np != NULL; np = np->wn_sibling) { 1915 np->wn_u1.index = false; 1916 spell_clear_flags(np->wn_child); 1917 } 1918 } 1919 1920 static void spell_print_node(wordnode_T *node, int depth) 1921 { 1922 if (node->wn_u1.index) { 1923 // Done this node before, print the reference. 1924 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); 1925 PRINTSOME(line2, depth, " ", 0, 0); 1926 PRINTSOME(line3, depth, " ", 0, 0); 1927 msg(line1, 0); 1928 msg(line2, 0); 1929 msg(line3, 0); 1930 } else { 1931 node->wn_u1.index = true; 1932 1933 if (node->wn_byte != NUL) { 1934 if (node->wn_child != NULL) { 1935 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); 1936 } else { 1937 // Cannot happen? 1938 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); 1939 } 1940 } else { 1941 PRINTSOME(line1, depth, " $ ", 0, 0); 1942 } 1943 1944 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); 1945 1946 if (node->wn_sibling != NULL) { 1947 PRINTSOME(line3, depth, " | ", 0, 0); 1948 } else { 1949 PRINTSOME(line3, depth, " ", 0, 0); 1950 } 1951 1952 if (node->wn_byte == NUL) { 1953 msg(line1, 0); 1954 msg(line2, 0); 1955 msg(line3, 0); 1956 } 1957 1958 // do the children 1959 if (node->wn_byte != NUL && node->wn_child != NULL) { 1960 spell_print_node(node->wn_child, depth + 1); 1961 } 1962 1963 // do the siblings 1964 if (node->wn_sibling != NULL) { 1965 // get rid of all parent details except | 1966 STRCPY(line1, line3); 1967 STRCPY(line2, line3); 1968 spell_print_node(node->wn_sibling, depth); 1969 } 1970 } 1971 } 1972 1973 static void spell_print_tree(wordnode_T *root) 1974 { 1975 if (root == NULL) { 1976 return; 1977 } 1978 1979 // Clear the "wn_u1.index" fields, used to remember what has been done. 1980 spell_clear_flags(root); 1981 1982 // Recursively print the tree. 1983 spell_print_node(root, 0); 1984 } 1985 1986 #endif // SPELL_PRINTTREE 1987 1988 // Reads the affix file "fname". 1989 // Returns an afffile_T, NULL for complete failure. 1990 static afffile_T *spell_read_aff(spellinfo_T *spin, char *fname) 1991 { 1992 char rline[MAXLINELEN]; 1993 char *line; 1994 char *pc = NULL; 1995 #define MAXITEMCNT 30 1996 char *(items[MAXITEMCNT]); 1997 char *p; 1998 int lnum = 0; 1999 affheader_T *cur_aff = NULL; 2000 bool did_postpone_prefix = false; 2001 int aff_todo = 0; 2002 hashtab_T *tp; 2003 char *low = NULL; 2004 char *fol = NULL; 2005 char *upp = NULL; 2006 bool found_map = false; 2007 hashitem_T *hi; 2008 int compminlen = 0; // COMPOUNDMIN value 2009 int compsylmax = 0; // COMPOUNDSYLMAX value 2010 int compoptions = 0; // COMP_ flags 2011 int compmax = 0; // COMPOUNDWORDMAX value 2012 char *compflags = NULL; // COMPOUNDFLAG and COMPOUNDRULE 2013 // concatenated 2014 char *midword = NULL; // MIDWORD value 2015 char *syllable = NULL; // SYLLABLE value 2016 char *sofofrom = NULL; // SOFOFROM value 2017 char *sofoto = NULL; // SOFOTO value 2018 2019 // Open the file. 2020 FILE *fd = os_fopen(fname, "r"); 2021 if (fd == NULL) { 2022 semsg(_(e_notopen), fname); 2023 return NULL; 2024 } 2025 2026 vim_snprintf(IObuff, IOSIZE, _("Reading affix file %s..."), fname); 2027 spell_message(spin, IObuff); 2028 2029 // Only do REP lines when not done in another .aff file already. 2030 bool do_rep = GA_EMPTY(&spin->si_rep); 2031 2032 // Only do REPSAL lines when not done in another .aff file already. 2033 bool do_repsal = GA_EMPTY(&spin->si_repsal); 2034 2035 // Only do SAL lines when not done in another .aff file already. 2036 bool do_sal = GA_EMPTY(&spin->si_sal); 2037 2038 // Only do MAP lines when not done in another .aff file already. 2039 bool do_mapline = GA_EMPTY(&spin->si_map); 2040 2041 // Allocate and init the afffile_T structure. 2042 afffile_T *aff = getroom(spin, sizeof(*aff), true); 2043 hash_init(&aff->af_pref); 2044 hash_init(&aff->af_suff); 2045 hash_init(&aff->af_comp); 2046 2047 // Read all the lines in the file one by one. 2048 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) { 2049 line_breakcheck(); 2050 lnum++; 2051 2052 // Skip comment lines. 2053 if (*rline == '#') { 2054 continue; 2055 } 2056 2057 // Convert from "SET" to 'encoding' when needed. 2058 xfree(pc); 2059 if (spin->si_conv.vc_type != CONV_NONE) { 2060 pc = string_convert(&spin->si_conv, rline, NULL); 2061 if (pc == NULL) { 2062 smsg(0, _("Conversion failure for word in %s line %d: %s"), 2063 fname, lnum, rline); 2064 continue; 2065 } 2066 line = pc; 2067 } else { 2068 pc = NULL; 2069 line = rline; 2070 } 2071 2072 // Split the line up in white separated items. Put a NUL after each 2073 // item. 2074 int itemcnt = 0; 2075 for (p = line;;) { 2076 while (*p != NUL && (uint8_t)(*p) <= ' ') { // skip white space and CR/NL 2077 p++; 2078 } 2079 if (*p == NUL) { 2080 break; 2081 } 2082 if (itemcnt == MAXITEMCNT) { // too many items 2083 break; 2084 } 2085 items[itemcnt++] = p; 2086 // A few items have arbitrary text argument, don't split them. 2087 if (itemcnt == 2 && spell_info_item(items[0])) { 2088 while ((uint8_t)(*p) >= ' ' || *p == TAB) { // skip until CR/NL 2089 p++; 2090 } 2091 } else { 2092 while ((uint8_t)(*p) > ' ') { // skip until white space or CR/NL 2093 p++; 2094 } 2095 } 2096 if (*p == NUL) { 2097 break; 2098 } 2099 *p++ = NUL; 2100 } 2101 2102 // Handle non-empty lines. 2103 if (itemcnt > 0) { 2104 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL) { 2105 // Setup for conversion from "ENC" to 'encoding'. 2106 aff->af_enc = enc_canonize(items[1]); 2107 if (!spin->si_ascii 2108 && convert_setup(&spin->si_conv, aff->af_enc, p_enc) == FAIL) { 2109 smsg(0, _("Conversion in %s not supported: from %s to %s"), 2110 fname, aff->af_enc, p_enc); 2111 } 2112 spin->si_conv.vc_fail = true; 2113 } else if (is_aff_rule(items, itemcnt, "FLAG", 2) 2114 && aff->af_flagtype == AFT_CHAR) { 2115 if (strcmp(items[1], "long") == 0) { 2116 aff->af_flagtype = AFT_LONG; 2117 } else if (strcmp(items[1], "num") == 0) { 2118 aff->af_flagtype = AFT_NUM; 2119 } else if (strcmp(items[1], "caplong") == 0) { 2120 aff->af_flagtype = AFT_CAPLONG; 2121 } else { 2122 smsg(0, _("Invalid value for FLAG in %s line %d: %s"), 2123 fname, lnum, items[1]); 2124 } 2125 if (aff->af_rare != 0 2126 || aff->af_keepcase != 0 2127 || aff->af_bad != 0 2128 || aff->af_needaffix != 0 2129 || aff->af_circumfix != 0 2130 || aff->af_needcomp != 0 2131 || aff->af_comproot != 0 2132 || aff->af_nosuggest != 0 2133 || compflags != NULL 2134 || aff->af_suff.ht_used > 0 2135 || aff->af_pref.ht_used > 0) { 2136 smsg(0, _("FLAG after using flags in %s line %d: %s"), 2137 fname, lnum, items[1]); 2138 } 2139 } else if (spell_info_item(items[0]) && itemcnt > 1) { 2140 p = getroom(spin, 2141 (spin->si_info == NULL ? 0 : strlen(spin->si_info)) 2142 + strlen(items[0]) 2143 + strlen(items[1]) + 3, false); 2144 if (spin->si_info != NULL) { 2145 STRCPY(p, spin->si_info); 2146 strcat(p, "\n"); 2147 } 2148 strcat(p, items[0]); 2149 strcat(p, " "); 2150 strcat(p, items[1]); 2151 spin->si_info = p; 2152 } else if (is_aff_rule(items, itemcnt, "MIDWORD", 2) && midword == NULL) { 2153 midword = getroom_save(spin, items[1]); 2154 } else if (is_aff_rule(items, itemcnt, "TRY", 2)) { 2155 // ignored, we look in the tree for what chars may appear 2156 } else if ((is_aff_rule(items, itemcnt, "RAR", 2) // TODO(vim): remove "RAR" later 2157 || is_aff_rule(items, itemcnt, "RARE", 2)) 2158 && aff->af_rare == 0) { 2159 aff->af_rare = affitem2flag(aff->af_flagtype, items[1], fname, lnum); 2160 } else if ((is_aff_rule(items, itemcnt, "KEP", 2) // TODO(vim): remove "KEP" later 2161 || is_aff_rule(items, itemcnt, "KEEPCASE", 2)) 2162 && aff->af_keepcase == 0) { 2163 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], fname, lnum); 2164 } else if ((is_aff_rule(items, itemcnt, "BAD", 2) 2165 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2)) 2166 && aff->af_bad == 0) { 2167 aff->af_bad = affitem2flag(aff->af_flagtype, items[1], fname, lnum); 2168 } else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2) 2169 && aff->af_needaffix == 0) { 2170 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], fname, lnum); 2171 } else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2) 2172 && aff->af_circumfix == 0) { 2173 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1], fname, lnum); 2174 } else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2) 2175 && aff->af_nosuggest == 0) { 2176 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], fname, lnum); 2177 } else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2) 2178 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2)) 2179 && aff->af_needcomp == 0) { 2180 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], fname, lnum); 2181 } else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2) 2182 && aff->af_comproot == 0) { 2183 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], fname, lnum); 2184 } else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2) 2185 && aff->af_compforbid == 0) { 2186 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], fname, lnum); 2187 if (aff->af_pref.ht_used > 0) { 2188 smsg(0, 2189 _("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"), 2190 fname, lnum); 2191 } 2192 } else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2) 2193 && aff->af_comppermit == 0) { 2194 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], fname, lnum); 2195 if (aff->af_pref.ht_used > 0) { 2196 smsg(0, 2197 _("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"), 2198 fname, lnum); 2199 } 2200 } else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2) 2201 && compflags == NULL) { 2202 // Turn flag "c" into COMPOUNDRULE compatible string "c+", 2203 // "Na" into "Na+", "1234" into "1234+". 2204 p = getroom(spin, strlen(items[1]) + 2, false); 2205 STRCPY(p, items[1]); 2206 strcat(p, "+"); 2207 compflags = p; 2208 } else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2)) { 2209 // We don't use the count, but do check that it's a number and 2210 // not COMPOUNDRULE mistyped. 2211 if (atoi(items[1]) == 0) { 2212 smsg(0, _("Wrong COMPOUNDRULES value in %s line %d: %s"), 2213 fname, lnum, items[1]); 2214 } 2215 } else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2)) { 2216 // Don't use the first rule if it is a number. 2217 if (compflags != NULL || *skipdigits(items[1]) != NUL) { 2218 // Concatenate this string to previously defined ones, 2219 // using a slash to separate them. 2220 int l = (int)strlen(items[1]) + 1; 2221 if (compflags != NULL) { 2222 l += (int)strlen(compflags) + 1; 2223 } 2224 p = getroom(spin, (size_t)l, false); 2225 if (compflags != NULL) { 2226 STRCPY(p, compflags); 2227 strcat(p, "/"); 2228 } 2229 strcat(p, items[1]); 2230 compflags = p; 2231 } 2232 } else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2) 2233 && compmax == 0) { 2234 compmax = atoi(items[1]); 2235 if (compmax == 0) { 2236 smsg(0, _("Wrong COMPOUNDWORDMAX value in %s line %d: %s"), 2237 fname, lnum, items[1]); 2238 } 2239 } else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2) 2240 && compminlen == 0) { 2241 compminlen = atoi(items[1]); 2242 if (compminlen == 0) { 2243 smsg(0, _("Wrong COMPOUNDMIN value in %s line %d: %s"), 2244 fname, lnum, items[1]); 2245 } 2246 } else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2) 2247 && compsylmax == 0) { 2248 compsylmax = atoi(items[1]); 2249 if (compsylmax == 0) { 2250 smsg(0, _("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), 2251 fname, lnum, items[1]); 2252 } 2253 } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1)) { 2254 compoptions |= COMP_CHECKDUP; 2255 } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1)) { 2256 compoptions |= COMP_CHECKREP; 2257 } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1)) { 2258 compoptions |= COMP_CHECKCASE; 2259 } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1)) { 2260 compoptions |= COMP_CHECKTRIPLE; 2261 } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2)) { 2262 if (atoi(items[1]) == 0) { 2263 smsg(0, _("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"), 2264 fname, lnum, items[1]); 2265 } 2266 } else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3)) { 2267 garray_T *gap = &spin->si_comppat; 2268 int i; 2269 2270 // Only add the couple if it isn't already there. 2271 for (i = 0; i < gap->ga_len - 1; i += 2) { 2272 if (strcmp(((char **)(gap->ga_data))[i], items[1]) == 0 2273 && strcmp(((char **)(gap->ga_data))[i + 1], items[2]) == 0) { 2274 break; 2275 } 2276 } 2277 if (i >= gap->ga_len) { 2278 ga_grow(gap, 2); 2279 ((char **)(gap->ga_data))[gap->ga_len++] = getroom_save(spin, items[1]); 2280 ((char **)(gap->ga_data))[gap->ga_len++] = getroom_save(spin, items[2]); 2281 } 2282 } else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2) 2283 && syllable == NULL) { 2284 syllable = getroom_save(spin, items[1]); 2285 } else if (is_aff_rule(items, itemcnt, "NOBREAK", 1)) { 2286 spin->si_nobreak = true; 2287 } else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1)) { 2288 spin->si_nosplitsugs = true; 2289 } else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1)) { 2290 spin->si_nocompoundsugs = true; 2291 } else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1)) { 2292 spin->si_nosugfile = true; 2293 } else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1)) { 2294 aff->af_pfxpostpone = true; 2295 } else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1)) { 2296 aff->af_ignoreextra = true; 2297 } else if ((strcmp(items[0], "PFX") == 0 2298 || strcmp(items[0], "SFX") == 0) 2299 && aff_todo == 0 2300 && itemcnt >= 4) { 2301 int lasti = 4; 2302 char key[AH_KEY_LEN]; 2303 2304 if (*items[0] == 'P') { 2305 tp = &aff->af_pref; 2306 } else { 2307 tp = &aff->af_suff; 2308 } 2309 2310 // Myspell allows the same affix name to be used multiple 2311 // times. The affix files that do this have an undocumented 2312 // "S" flag on all but the last block, thus we check for that 2313 // and store it in ah_follows. 2314 xstrlcpy(key, items[1], AH_KEY_LEN); 2315 hi = hash_find(tp, key); 2316 if (!HASHITEM_EMPTY(hi)) { 2317 cur_aff = HI2AH(hi); 2318 if (cur_aff->ah_combine != (*items[2] == 'Y')) { 2319 smsg(0, _("Different combining flag in continued affix block in %s line %d: %s"), 2320 fname, lnum, items[1]); 2321 } 2322 if (!cur_aff->ah_follows) { 2323 smsg(0, _("Duplicate affix in %s line %d: %s"), 2324 fname, lnum, items[1]); 2325 } 2326 } else { 2327 // New affix letter. 2328 cur_aff = getroom(spin, sizeof(*cur_aff), true); 2329 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], fname, lnum); 2330 if (cur_aff->ah_flag == 0 || strlen(items[1]) >= AH_KEY_LEN) { 2331 break; 2332 } 2333 if (cur_aff->ah_flag == aff->af_bad 2334 || cur_aff->ah_flag == aff->af_rare 2335 || cur_aff->ah_flag == aff->af_keepcase 2336 || cur_aff->ah_flag == aff->af_needaffix 2337 || cur_aff->ah_flag == aff->af_circumfix 2338 || cur_aff->ah_flag == aff->af_nosuggest 2339 || cur_aff->ah_flag == aff->af_needcomp 2340 || cur_aff->ah_flag == aff->af_comproot) { 2341 smsg(0, _("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST " 2342 "in %s line %d: %s"), 2343 fname, lnum, items[1]); 2344 } 2345 STRCPY(cur_aff->ah_key, items[1]); 2346 hash_add(tp, cur_aff->ah_key); 2347 2348 cur_aff->ah_combine = (*items[2] == 'Y'); 2349 } 2350 2351 // Check for the "S" flag, which apparently means that another 2352 // block with the same affix name is following. 2353 if (itemcnt > lasti && strcmp(items[lasti], "S") == 0) { 2354 lasti++; 2355 cur_aff->ah_follows = true; 2356 } else { 2357 cur_aff->ah_follows = false; 2358 } 2359 2360 // Myspell allows extra text after the item, but that might 2361 // mean mistakes go unnoticed. Require a comment-starter, 2362 // unless IGNOREEXTRA is used. Hunspell uses a "-" item. 2363 if (itemcnt > lasti 2364 && !aff->af_ignoreextra 2365 && *items[lasti] != '#') { 2366 smsg(0, _(e_afftrailing), fname, lnum, items[lasti]); 2367 } 2368 2369 if (strcmp(items[2], "Y") != 0 && strcmp(items[2], "N") != 0) { 2370 smsg(0, _("Expected Y or N in %s line %d: %s"), 2371 fname, lnum, items[2]); 2372 } 2373 2374 if (*items[0] == 'P' && aff->af_pfxpostpone) { 2375 if (cur_aff->ah_newID == 0) { 2376 // Use a new number in the .spl file later, to be able 2377 // to handle multiple .aff files. 2378 check_renumber(spin); 2379 cur_aff->ah_newID = ++spin->si_newprefID; 2380 2381 // We only really use ah_newID if the prefix is 2382 // postponed. We know that only after handling all 2383 // the items. 2384 did_postpone_prefix = false; 2385 } else { 2386 // Did use the ID in a previous block. 2387 did_postpone_prefix = true; 2388 } 2389 } 2390 2391 aff_todo = atoi(items[3]); 2392 } else if ((strcmp(items[0], "PFX") == 0 2393 || strcmp(items[0], "SFX") == 0) 2394 && aff_todo > 0 2395 && strcmp(cur_aff->ah_key, items[1]) == 0 2396 && itemcnt >= 5) { 2397 affentry_T *aff_entry; 2398 int lasti = 5; 2399 2400 // Myspell allows extra text after the item, but that might 2401 // mean mistakes go unnoticed. Require a comment-starter. 2402 // Hunspell uses a "-" item. 2403 if (itemcnt > lasti && *items[lasti] != '#' 2404 && (strcmp(items[lasti], "-") != 0 2405 || itemcnt != lasti + 1)) { 2406 smsg(0, _(e_afftrailing), fname, lnum, items[lasti]); 2407 } 2408 2409 // New item for an affix letter. 2410 aff_todo--; 2411 aff_entry = getroom(spin, sizeof(*aff_entry), true); 2412 2413 if (strcmp(items[2], "0") != 0) { 2414 aff_entry->ae_chop = getroom_save(spin, items[2]); 2415 } 2416 if (strcmp(items[3], "0") != 0) { 2417 aff_entry->ae_add = getroom_save(spin, items[3]); 2418 2419 // Recognize flags on the affix: abcd/XYZ 2420 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); 2421 if (aff_entry->ae_flags != NULL) { 2422 *aff_entry->ae_flags++ = NUL; 2423 aff_process_flags(aff, aff_entry); 2424 } 2425 } 2426 2427 // Don't use an affix entry with non-ASCII characters when 2428 // "spin->si_ascii" is true. 2429 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) 2430 || has_non_ascii(aff_entry->ae_add))) { 2431 aff_entry->ae_next = cur_aff->ah_first; 2432 cur_aff->ah_first = aff_entry; 2433 2434 if (strcmp(items[4], ".") != 0) { 2435 char buf[MAXLINELEN]; 2436 2437 aff_entry->ae_cond = getroom_save(spin, items[4]); 2438 snprintf(buf, sizeof(buf), *items[0] == 'P' ? "^%s" : "%s$", items[4]); 2439 aff_entry->ae_prog = vim_regcomp(buf, RE_MAGIC + RE_STRING + RE_STRICT); 2440 if (aff_entry->ae_prog == NULL) { 2441 smsg(0, _("Broken condition in %s line %d: %s"), 2442 fname, lnum, items[4]); 2443 } 2444 } 2445 2446 // For postponed prefixes we need an entry in si_prefcond 2447 // for the condition. Use an existing one if possible. 2448 // Can't be done for an affix with flags, ignoring 2449 // COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. 2450 if (*items[0] == 'P' && aff->af_pfxpostpone 2451 && aff_entry->ae_flags == NULL) { 2452 bool upper = false; 2453 // When the chop string is one lower-case letter and 2454 // the add string ends in the upper-case letter we set 2455 // the "upper" flag, clear "ae_chop" and remove the 2456 // letters from "ae_add". The condition must either 2457 // be empty or start with the same letter. 2458 if (aff_entry->ae_chop != NULL 2459 && aff_entry->ae_add != NULL 2460 && aff_entry->ae_chop[utfc_ptr2len(aff_entry->ae_chop)] == 2461 NUL) { 2462 int c = utf_ptr2char(aff_entry->ae_chop); 2463 int c_up = SPELL_TOUPPER(c); 2464 if (c_up != c 2465 && (aff_entry->ae_cond == NULL 2466 || utf_ptr2char(aff_entry->ae_cond) == c)) { 2467 p = aff_entry->ae_add + strlen(aff_entry->ae_add); 2468 MB_PTR_BACK(aff_entry->ae_add, p); 2469 if (utf_ptr2char(p) == c_up) { 2470 upper = true; 2471 aff_entry->ae_chop = NULL; 2472 *p = NUL; 2473 2474 // The condition is matched with the 2475 // actual word, thus must check for the 2476 // upper-case letter. 2477 if (aff_entry->ae_cond != NULL) { 2478 char buf[MAXLINELEN]; 2479 onecap_copy(items[4], buf, true); 2480 aff_entry->ae_cond = getroom_save(spin, buf); 2481 if (aff_entry->ae_cond != NULL) { 2482 snprintf(buf, MAXLINELEN, "^%s", aff_entry->ae_cond); 2483 vim_regfree(aff_entry->ae_prog); 2484 aff_entry->ae_prog = vim_regcomp(buf, RE_MAGIC + RE_STRING); 2485 } 2486 } 2487 } 2488 } 2489 } 2490 2491 if (aff_entry->ae_chop == NULL) { 2492 int idx; 2493 2494 // Find a previously used condition. 2495 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; idx--) { 2496 p = ((char **)spin->si_prefcond.ga_data)[idx]; 2497 if (str_equal(p, aff_entry->ae_cond)) { 2498 break; 2499 } 2500 } 2501 if (idx < 0) { 2502 // Not found, add a new condition. 2503 idx = spin->si_prefcond.ga_len; 2504 char **pp = GA_APPEND_VIA_PTR(char *, &spin->si_prefcond); 2505 *pp = (aff_entry->ae_cond == NULL) 2506 ? NULL : getroom_save(spin, aff_entry->ae_cond); 2507 } 2508 2509 // Add the prefix to the prefix tree. 2510 if (aff_entry->ae_add == NULL) { 2511 p = ""; 2512 } else { 2513 p = aff_entry->ae_add; 2514 } 2515 2516 // PFX_FLAGS is a negative number, so that 2517 // tree_add_word() knows this is the prefix tree. 2518 int n = PFX_FLAGS; 2519 if (!cur_aff->ah_combine) { 2520 n |= WFP_NC; 2521 } 2522 if (upper) { 2523 n |= WFP_UP; 2524 } 2525 if (aff_entry->ae_comppermit) { 2526 n |= WFP_COMPPERMIT; 2527 } 2528 if (aff_entry->ae_compforbid) { 2529 n |= WFP_COMPFORBID; 2530 } 2531 tree_add_word(spin, p, spin->si_prefroot, n, 2532 idx, cur_aff->ah_newID); 2533 did_postpone_prefix = true; 2534 } 2535 2536 // Didn't actually use ah_newID, backup si_newprefID. 2537 if (aff_todo == 0 && !did_postpone_prefix) { 2538 spin->si_newprefID--; 2539 cur_aff->ah_newID = 0; 2540 } 2541 } 2542 } 2543 } else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL) { 2544 fol = xstrdup(items[1]); 2545 } else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL) { 2546 low = xstrdup(items[1]); 2547 } else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL) { 2548 upp = xstrdup(items[1]); 2549 } else if (is_aff_rule(items, itemcnt, "REP", 2) 2550 || is_aff_rule(items, itemcnt, "REPSAL", 2)) { 2551 // Ignore REP/REPSAL count 2552 if (!isdigit((uint8_t)(*items[1]))) { 2553 smsg(0, _("Expected REP(SAL) count in %s line %d"), 2554 fname, lnum); 2555 } 2556 } else if ((strcmp(items[0], "REP") == 0 2557 || strcmp(items[0], "REPSAL") == 0) 2558 && itemcnt >= 3) { 2559 // REP/REPSAL item 2560 // Myspell ignores extra arguments, we require it starts with 2561 // # to detect mistakes. 2562 if (itemcnt > 3 && items[3][0] != '#') { 2563 smsg(0, _(e_afftrailing), fname, lnum, items[3]); 2564 } 2565 if (items[0][3] == 'S' ? do_repsal : do_rep) { 2566 // Replace underscore with space (can't include a space 2567 // directly). 2568 for (p = items[1]; *p != NUL; MB_PTR_ADV(p)) { 2569 if (*p == '_') { 2570 *p = ' '; 2571 } 2572 } 2573 for (p = items[2]; *p != NUL; MB_PTR_ADV(p)) { 2574 if (*p == '_') { 2575 *p = ' '; 2576 } 2577 } 2578 add_fromto(spin, items[0][3] == 'S' 2579 ? &spin->si_repsal 2580 : &spin->si_rep, items[1], items[2]); 2581 } 2582 } else if (is_aff_rule(items, itemcnt, "MAP", 2)) { 2583 // MAP item or count 2584 if (!found_map) { 2585 // First line contains the count. 2586 found_map = true; 2587 if (!isdigit((uint8_t)(*items[1]))) { 2588 smsg(0, _("Expected MAP count in %s line %d"), 2589 fname, lnum); 2590 } 2591 } else if (do_mapline) { 2592 // Check that every character appears only once. 2593 for (p = items[1]; *p != NUL;) { 2594 int c = mb_ptr2char_adv((const char **)&p); 2595 if ((!GA_EMPTY(&spin->si_map) 2596 && vim_strchr(spin->si_map.ga_data, c) 2597 != NULL) 2598 || vim_strchr(p, c) != NULL) { 2599 smsg(0, _("Duplicate character in MAP in %s line %d"), 2600 fname, lnum); 2601 } 2602 } 2603 2604 // We simply concatenate all the MAP strings, separated by 2605 // slashes. 2606 ga_concat(&spin->si_map, items[1]); 2607 ga_append(&spin->si_map, '/'); 2608 } 2609 } 2610 // Accept "SAL from to" and "SAL from to #comment". 2611 else if (is_aff_rule(items, itemcnt, "SAL", 3)) { 2612 if (do_sal) { 2613 // SAL item (sounds-a-like) 2614 // Either one of the known keys or a from-to pair. 2615 if (strcmp(items[1], "followup") == 0) { 2616 spin->si_followup = sal_to_bool(items[2]); 2617 } else if (strcmp(items[1], "collapse_result") == 0) { 2618 spin->si_collapse = sal_to_bool(items[2]); 2619 } else if (strcmp(items[1], "remove_accents") == 0) { 2620 spin->si_rem_accents = sal_to_bool(items[2]); 2621 } else { 2622 // when "to" is "_" it means empty 2623 add_fromto(spin, &spin->si_sal, items[1], 2624 strcmp(items[2], "_") == 0 ? "" 2625 : items[2]); 2626 } 2627 } 2628 } else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2) 2629 && sofofrom == NULL) { 2630 sofofrom = getroom_save(spin, items[1]); 2631 } else if (is_aff_rule(items, itemcnt, "SOFOTO", 2) 2632 && sofoto == NULL) { 2633 sofoto = getroom_save(spin, items[1]); 2634 } else if (strcmp(items[0], "COMMON") == 0) { 2635 for (int i = 1; i < itemcnt; i++) { 2636 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, items[i]))) { 2637 p = xstrdup(items[i]); 2638 hash_add(&spin->si_commonwords, p); 2639 } 2640 } 2641 } else { 2642 smsg(0, _("Unrecognized or duplicate item in %s line %d: %s"), 2643 fname, lnum, items[0]); 2644 } 2645 } 2646 } 2647 2648 if (fol != NULL || low != NULL || upp != NULL) { 2649 if (spin->si_clear_chartab) { 2650 // Clear the char type tables, don't want to use any of the 2651 // currently used spell properties. 2652 init_spell_chartab(); 2653 spin->si_clear_chartab = false; 2654 } 2655 2656 xfree(fol); 2657 xfree(low); 2658 xfree(upp); 2659 } 2660 2661 // Use compound specifications of the .aff file for the spell info. 2662 if (compmax != 0) { 2663 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX"); 2664 spin->si_compmax = compmax; 2665 } 2666 2667 if (compminlen != 0) { 2668 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); 2669 spin->si_compminlen = compminlen; 2670 } 2671 2672 if (compsylmax != 0) { 2673 if (syllable == NULL) { 2674 smsg(0, "%s", _("COMPOUNDSYLMAX used without SYLLABLE")); 2675 } 2676 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); 2677 spin->si_compsylmax = compsylmax; 2678 } 2679 2680 if (compoptions != 0) { 2681 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options"); 2682 spin->si_compoptions |= compoptions; 2683 } 2684 2685 if (compflags != NULL) { 2686 process_compflags(spin, aff, compflags); 2687 } 2688 2689 // Check that we didn't use too many renumbered flags. 2690 if (spin->si_newcompID < spin->si_newprefID) { 2691 if (spin->si_newcompID == 127 || spin->si_newcompID == 255) { 2692 msg(_("Too many postponed prefixes"), 0); 2693 } else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) { 2694 msg(_("Too many compound flags"), 0); 2695 } else { 2696 msg(_("Too many postponed prefixes and/or compound flags"), 0); 2697 } 2698 } 2699 2700 if (syllable != NULL) { 2701 aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); 2702 spin->si_syllable = syllable; 2703 } 2704 2705 if (sofofrom != NULL || sofoto != NULL) { 2706 if (sofofrom == NULL || sofoto == NULL) { 2707 smsg(0, _("Missing SOFO%s line in %s"), 2708 sofofrom == NULL ? "FROM" : "TO", fname); 2709 } else if (!GA_EMPTY(&spin->si_sal)) { 2710 smsg(0, _("Both SAL and SOFO lines in %s"), fname); 2711 } else { 2712 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); 2713 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); 2714 spin->si_sofofr = sofofrom; 2715 spin->si_sofoto = sofoto; 2716 } 2717 } 2718 2719 if (midword != NULL) { 2720 aff_check_string(spin->si_midword, midword, "MIDWORD"); 2721 spin->si_midword = midword; 2722 } 2723 2724 xfree(pc); 2725 fclose(fd); 2726 return aff; 2727 } 2728 2729 /// @return true when items[0] equals "rulename", there are "mincount" items or 2730 /// a comment is following after item "mincount". 2731 static bool is_aff_rule(char **items, int itemcnt, char *rulename, int mincount) 2732 { 2733 return strcmp(items[0], rulename) == 0 2734 && (itemcnt == mincount 2735 || (itemcnt > mincount && items[mincount][0] == '#')); 2736 } 2737 2738 // For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from 2739 // ae_flags to ae_comppermit and ae_compforbid. 2740 static void aff_process_flags(afffile_T *affile, affentry_T *entry) 2741 { 2742 if (entry->ae_flags != NULL 2743 && (affile->af_compforbid != 0 || affile->af_comppermit != 0)) { 2744 for (char *p = entry->ae_flags; *p != NUL;) { 2745 char *prevp = p; 2746 unsigned flag = get_affitem(affile->af_flagtype, &p); 2747 if (flag == affile->af_comppermit || flag == affile->af_compforbid) { 2748 STRMOVE(prevp, p); 2749 p = prevp; 2750 if (flag == affile->af_comppermit) { 2751 entry->ae_comppermit = true; 2752 } else { 2753 entry->ae_compforbid = true; 2754 } 2755 } 2756 if (affile->af_flagtype == AFT_NUM && *p == ',') { 2757 p++; 2758 } 2759 } 2760 if (*entry->ae_flags == NUL) { 2761 entry->ae_flags = NULL; // nothing left 2762 } 2763 } 2764 } 2765 2766 /// @return true if "s" is the name of an info item in the affix file. 2767 static bool spell_info_item(char *s) 2768 { 2769 return strcmp(s, "NAME") == 0 2770 || strcmp(s, "HOME") == 0 2771 || strcmp(s, "VERSION") == 0 2772 || strcmp(s, "AUTHOR") == 0 2773 || strcmp(s, "EMAIL") == 0 2774 || strcmp(s, "COPYRIGHT") == 0; 2775 } 2776 2777 // Turn an affix flag name into a number, according to the FLAG type. 2778 // returns zero for failure. 2779 static unsigned affitem2flag(int flagtype, char *item, char *fname, int lnum) 2780 { 2781 char *p = item; 2782 2783 unsigned res = get_affitem(flagtype, &p); 2784 if (res == 0) { 2785 if (flagtype == AFT_NUM) { 2786 smsg(0, _("Flag is not a number in %s line %d: %s"), 2787 fname, lnum, item); 2788 } else { 2789 smsg(0, _("Illegal flag in %s line %d: %s"), 2790 fname, lnum, item); 2791 } 2792 } 2793 if (*p != NUL) { 2794 smsg(0, _(e_affname), fname, lnum, item); 2795 return 0; 2796 } 2797 2798 return res; 2799 } 2800 2801 // Get one affix name from "*pp" and advance the pointer. 2802 // Returns ZERO_FLAG for "0". 2803 // Returns zero for an error, still advances the pointer then. 2804 static unsigned get_affitem(int flagtype, char **pp) 2805 { 2806 int res; 2807 2808 if (flagtype == AFT_NUM) { 2809 if (!ascii_isdigit(**pp)) { 2810 (*pp)++; // always advance, avoid getting stuck 2811 return 0; 2812 } 2813 res = getdigits_int(pp, true, 0); 2814 if (res == 0) { 2815 res = ZERO_FLAG; 2816 } 2817 } else { 2818 res = mb_ptr2char_adv((const char **)pp); 2819 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG 2820 && res >= 'A' && res <= 'Z')) { 2821 if (**pp == NUL) { 2822 return 0; 2823 } 2824 res = mb_ptr2char_adv((const char **)pp) + (res << 16); 2825 } 2826 } 2827 return (unsigned)res; 2828 } 2829 2830 /// Process the "compflags" string used in an affix file and append it to 2831 /// spin->si_compflags. 2832 /// The processing involves changing the affix names to ID numbers, so that 2833 /// they fit in one byte. 2834 static void process_compflags(spellinfo_T *spin, afffile_T *aff, char *compflags) 2835 { 2836 compitem_T *ci; 2837 int id; 2838 char key[AH_KEY_LEN]; 2839 2840 // Make room for the old and the new compflags, concatenated with a / in 2841 // between. Processing it makes it shorter, but we don't know by how 2842 // much, thus allocate the maximum. 2843 int len = (int)strlen(compflags) + 1; 2844 if (spin->si_compflags != NULL) { 2845 len += (int)strlen(spin->si_compflags) + 1; 2846 } 2847 char *p = getroom(spin, (size_t)len, false); 2848 if (spin->si_compflags != NULL) { 2849 STRCPY(p, spin->si_compflags); 2850 strcat(p, "/"); 2851 } 2852 spin->si_compflags = p; 2853 uint8_t *tp = (uint8_t *)p + strlen(p); 2854 2855 for (p = compflags; *p != NUL;) { 2856 if (vim_strchr("/?*+[]", (uint8_t)(*p)) != NULL) { 2857 // Copy non-flag characters directly. 2858 *tp++ = (uint8_t)(*p++); 2859 } else { 2860 // First get the flag number, also checks validity. 2861 char *prevp = p; 2862 unsigned flag = get_affitem(aff->af_flagtype, &p); 2863 if (flag != 0) { 2864 // Find the flag in the hashtable. If it was used before, use 2865 // the existing ID. Otherwise add a new entry. 2866 xmemcpyz(key, prevp, (size_t)(p - prevp)); 2867 hashitem_T *hi = hash_find(&aff->af_comp, key); 2868 if (!HASHITEM_EMPTY(hi)) { 2869 id = HI2CI(hi)->ci_newID; 2870 } else { 2871 ci = getroom(spin, sizeof(compitem_T), true); 2872 STRCPY(ci->ci_key, key); 2873 ci->ci_flag = flag; 2874 // Avoid using a flag ID that has a special meaning in a 2875 // regexp (also inside []). 2876 do { 2877 check_renumber(spin); 2878 id = spin->si_newcompID--; 2879 } while (vim_strchr("/?*+[]\\-^", id) != NULL); 2880 ci->ci_newID = id; 2881 hash_add(&aff->af_comp, ci->ci_key); 2882 } 2883 *tp++ = (uint8_t)id; 2884 } 2885 if (aff->af_flagtype == AFT_NUM && *p == ',') { 2886 p++; 2887 } 2888 } 2889 } 2890 2891 *tp = NUL; 2892 } 2893 2894 // Check that the new IDs for postponed affixes and compounding don't overrun 2895 // each other. We have almost 255 available, but start at 0-127 to avoid 2896 // using two bytes for utf-8. When the 0-127 range is used up go to 128-255. 2897 // When that is used up an error message is given. 2898 static void check_renumber(spellinfo_T *spin) 2899 { 2900 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) { 2901 spin->si_newprefID = 127; 2902 spin->si_newcompID = 255; 2903 } 2904 } 2905 2906 // Returns true if flag "flag" appears in affix list "afflist". 2907 static bool flag_in_afflist(int flagtype, char *afflist, unsigned flag) 2908 { 2909 switch (flagtype) { 2910 case AFT_CHAR: 2911 return vim_strchr(afflist, (int)flag) != NULL; 2912 2913 case AFT_CAPLONG: 2914 case AFT_LONG: 2915 for (char *p = afflist; *p != NUL;) { 2916 unsigned n = (unsigned)mb_ptr2char_adv((const char **)&p); 2917 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) 2918 && *p != NUL) { 2919 n = (unsigned)mb_ptr2char_adv((const char **)&p) + (n << 16); 2920 } 2921 if (n == flag) { 2922 return true; 2923 } 2924 } 2925 break; 2926 2927 case AFT_NUM: 2928 for (char *p = afflist; *p != NUL;) { 2929 int digits = getdigits_int(&p, true, 0); 2930 assert(digits >= 0); 2931 unsigned n = (unsigned)digits; 2932 if (n == 0) { 2933 n = ZERO_FLAG; 2934 } 2935 if (n == flag) { 2936 return true; 2937 } 2938 if (*p != NUL) { // skip over comma 2939 p++; 2940 } 2941 } 2942 break; 2943 } 2944 return false; 2945 } 2946 2947 // Give a warning when "spinval" and "affval" numbers are set and not the same. 2948 static void aff_check_number(int spinval, int affval, char *name) 2949 { 2950 if (spinval != 0 && spinval != affval) { 2951 smsg(0, _("%s value differs from what is used in another .aff file"), 2952 name); 2953 } 2954 } 2955 2956 /// Give a warning when "spinval" and "affval" strings are set and not the same. 2957 static void aff_check_string(char *spinval, char *affval, char *name) 2958 { 2959 if (spinval != NULL && strcmp(spinval, affval) != 0) { 2960 smsg(0, _("%s value differs from what is used in another .aff file"), 2961 name); 2962 } 2963 } 2964 2965 /// @return true if strings "s1" and "s2" are equal. Also consider both being 2966 /// NULL as equal. 2967 static bool str_equal(char *s1, char *s2) 2968 { 2969 if (s1 == NULL || s2 == NULL) { 2970 return s1 == s2; 2971 } 2972 return strcmp(s1, s2) == 0; 2973 } 2974 2975 /// Add a from-to item to "gap". Used for REP and SAL items. 2976 /// They are stored case-folded. 2977 static void add_fromto(spellinfo_T *spin, garray_T *gap, char *from, char *to) 2978 { 2979 char word[MAXWLEN]; 2980 2981 fromto_T *ftp = GA_APPEND_VIA_PTR(fromto_T, gap); 2982 spell_casefold(curwin, from, (int)strlen(from), word, MAXWLEN); 2983 ftp->ft_from = getroom_save(spin, word); 2984 spell_casefold(curwin, to, (int)strlen(to), word, MAXWLEN); 2985 ftp->ft_to = getroom_save(spin, word); 2986 } 2987 2988 /// Converts a boolean argument in a SAL line to true or false; 2989 static bool sal_to_bool(char *s) 2990 { 2991 return strcmp(s, "1") == 0 || strcmp(s, "true") == 0; 2992 } 2993 2994 // Free the structure filled by spell_read_aff(). 2995 static void spell_free_aff(afffile_T *aff) 2996 { 2997 xfree(aff->af_enc); 2998 2999 // All this trouble to free the "ae_prog" items... 3000 for (hashtab_T *ht = &aff->af_pref;; ht = &aff->af_suff) { 3001 int todo = (int)ht->ht_used; 3002 for (hashitem_T *hi = ht->ht_array; todo > 0; hi++) { 3003 if (!HASHITEM_EMPTY(hi)) { 3004 todo--; 3005 affheader_T *ah = HI2AH(hi); 3006 for (affentry_T *ae = ah->ah_first; ae != NULL; ae = ae->ae_next) { 3007 vim_regfree(ae->ae_prog); 3008 } 3009 } 3010 } 3011 if (ht == &aff->af_suff) { 3012 break; 3013 } 3014 } 3015 3016 hash_clear(&aff->af_pref); 3017 hash_clear(&aff->af_suff); 3018 hash_clear(&aff->af_comp); 3019 } 3020 3021 // Read dictionary file "fname". 3022 // Returns OK or FAIL; 3023 static int spell_read_dic(spellinfo_T *spin, char *fname, afffile_T *affile) 3024 { 3025 hashtab_T ht; 3026 char line[MAXLINELEN]; 3027 char store_afflist[MAXWLEN]; 3028 char *pc; 3029 char *w; 3030 int lnum = 1; 3031 int non_ascii = 0; 3032 int retval = OK; 3033 char message[MAXLINELEN + MAXWLEN]; 3034 int duplicate = 0; 3035 Timestamp last_msg_time = 0; 3036 3037 // Open the file. 3038 FILE *fd = os_fopen(fname, "r"); 3039 if (fd == NULL) { 3040 semsg(_(e_notopen), fname); 3041 return FAIL; 3042 } 3043 3044 // The hashtable is only used to detect duplicated words. 3045 hash_init(&ht); 3046 3047 vim_snprintf(IObuff, IOSIZE, 3048 _("Reading dictionary file %s..."), fname); 3049 spell_message(spin, IObuff); 3050 3051 // start with a message for the first line 3052 spin->si_msg_count = 999999; 3053 3054 // Read and ignore the first line: word count. 3055 if (vim_fgets(line, MAXLINELEN, fd) || !ascii_isdigit(*skipwhite(line))) { 3056 semsg(_("E760: No word count in %s"), fname); 3057 } 3058 3059 // Read all the lines in the file one by one. 3060 // The words are converted to 'encoding' here, before being added to 3061 // the hashtable. 3062 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) { 3063 line_breakcheck(); 3064 lnum++; 3065 if (line[0] == '#' || line[0] == '/') { 3066 continue; // comment line 3067 } 3068 // Remove CR, LF and white space from the end. White space halfway through 3069 // the word is kept to allow multi-word terms like "et al.". 3070 int l = (int)strlen(line); 3071 while (l > 0 && (uint8_t)line[l - 1] <= ' ') { 3072 l--; 3073 } 3074 if (l == 0) { 3075 continue; // empty line 3076 } 3077 line[l] = NUL; 3078 3079 // Convert from "SET" to 'encoding' when needed. 3080 if (spin->si_conv.vc_type != CONV_NONE) { 3081 pc = string_convert(&spin->si_conv, line, NULL); 3082 if (pc == NULL) { 3083 smsg(0, _("Conversion failure for word in %s line %d: %s"), 3084 fname, lnum, line); 3085 continue; 3086 } 3087 w = pc; 3088 } else { 3089 pc = NULL; 3090 w = line; 3091 } 3092 3093 // Truncate the word at the "/", set "afflist" to what follows. 3094 // Replace "\/" by "/" and "\\" by "\". 3095 char *afflist = NULL; 3096 for (char *p = w; *p != NUL; MB_PTR_ADV(p)) { 3097 if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) { 3098 STRMOVE(p, p + 1); 3099 } else if (*p == '/') { 3100 *p = NUL; 3101 afflist = p + 1; 3102 break; 3103 } 3104 } 3105 3106 // Skip non-ASCII words when "spin->si_ascii" is true. 3107 if (spin->si_ascii && has_non_ascii(w)) { 3108 non_ascii++; 3109 xfree(pc); 3110 continue; 3111 } 3112 3113 // This takes time, print a message every 10000 words, but not more 3114 // often than once per second. 3115 if (spin->si_verbose && spin->si_msg_count > 10000) { 3116 spin->si_msg_count = 0; 3117 if (os_time() > last_msg_time) { 3118 last_msg_time = os_time(); 3119 vim_snprintf(message, sizeof(message), 3120 _("line %6d, word %6d - %s"), 3121 lnum, spin->si_foldwcount + spin->si_keepwcount, w); 3122 msg_start(); 3123 msg_outtrans_long(message, 0); 3124 msg_clr_eos(); 3125 msg_didout = false; 3126 msg_col = 0; 3127 ui_flush(); 3128 } 3129 } 3130 3131 // Store the word in the hashtable to be able to find duplicates. 3132 char *dw = getroom_save(spin, w); 3133 if (dw == NULL) { 3134 retval = FAIL; 3135 xfree(pc); 3136 break; 3137 } 3138 3139 hash_T hash = hash_hash(dw); 3140 hashitem_T *hi = hash_lookup(&ht, dw, strlen(dw), hash); 3141 if (!HASHITEM_EMPTY(hi)) { 3142 if (p_verbose > 0) { 3143 smsg(0, _("Duplicate word in %s line %d: %s"), 3144 fname, lnum, dw); 3145 } else if (duplicate == 0) { 3146 smsg(0, _("First duplicate word in %s line %d: %s"), 3147 fname, lnum, dw); 3148 } 3149 duplicate++; 3150 } else { 3151 hash_add_item(&ht, hi, dw, hash); 3152 } 3153 3154 int flags = 0; 3155 store_afflist[0] = NUL; 3156 int pfxlen = 0; 3157 bool need_affix = false; 3158 if (afflist != NULL) { 3159 // Extract flags from the affix list. 3160 flags |= get_affix_flags(affile, afflist); 3161 3162 if (affile->af_needaffix != 0 3163 && flag_in_afflist(affile->af_flagtype, afflist, 3164 affile->af_needaffix)) { 3165 need_affix = true; 3166 } 3167 3168 if (affile->af_pfxpostpone) { 3169 // Need to store the list of prefix IDs with the word. 3170 pfxlen = get_pfxlist(affile, afflist, store_afflist); 3171 } 3172 3173 if (spin->si_compflags != NULL) { 3174 // Need to store the list of compound flags with the word. 3175 // Concatenate them to the list of prefix IDs. 3176 get_compflags(affile, afflist, store_afflist + pfxlen); 3177 } 3178 } 3179 3180 // Add the word to the word tree(s). 3181 if (store_word(spin, dw, flags, spin->si_region, 3182 store_afflist, need_affix) == FAIL) { 3183 retval = FAIL; 3184 } 3185 3186 if (afflist != NULL) { 3187 // Find all matching suffixes and add the resulting words. 3188 // Additionally do matching prefixes that combine. 3189 if (store_aff_word(spin, dw, afflist, affile, 3190 &affile->af_suff, &affile->af_pref, 3191 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) { 3192 retval = FAIL; 3193 } 3194 3195 // Find all matching prefixes and add the resulting words. 3196 if (store_aff_word(spin, dw, afflist, affile, 3197 &affile->af_pref, NULL, 3198 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL) { 3199 retval = FAIL; 3200 } 3201 } 3202 3203 xfree(pc); 3204 } 3205 3206 if (duplicate > 0) { 3207 smsg(0, _("%d duplicate word(s) in %s"), duplicate, fname); 3208 } 3209 if (spin->si_ascii && non_ascii > 0) { 3210 smsg(0, _("Ignored %d word(s) with non-ASCII characters in %s"), 3211 non_ascii, fname); 3212 } 3213 hash_clear(&ht); 3214 3215 fclose(fd); 3216 return retval; 3217 } 3218 3219 // Check for affix flags in "afflist" that are turned into word flags. 3220 // Return WF_ flags. 3221 static int get_affix_flags(afffile_T *affile, char *afflist) 3222 { 3223 int flags = 0; 3224 3225 if (affile->af_keepcase != 0 3226 && flag_in_afflist(affile->af_flagtype, afflist, 3227 affile->af_keepcase)) { 3228 flags |= WF_KEEPCAP | WF_FIXCAP; 3229 } 3230 if (affile->af_rare != 0 3231 && flag_in_afflist(affile->af_flagtype, afflist, affile->af_rare)) { 3232 flags |= WF_RARE; 3233 } 3234 if (affile->af_bad != 0 3235 && flag_in_afflist(affile->af_flagtype, afflist, affile->af_bad)) { 3236 flags |= WF_BANNED; 3237 } 3238 if (affile->af_needcomp != 0 3239 && flag_in_afflist(affile->af_flagtype, afflist, 3240 affile->af_needcomp)) { 3241 flags |= WF_NEEDCOMP; 3242 } 3243 if (affile->af_comproot != 0 3244 && flag_in_afflist(affile->af_flagtype, afflist, 3245 affile->af_comproot)) { 3246 flags |= WF_COMPROOT; 3247 } 3248 if (affile->af_nosuggest != 0 3249 && flag_in_afflist(affile->af_flagtype, afflist, 3250 affile->af_nosuggest)) { 3251 flags |= WF_NOSUGGEST; 3252 } 3253 return flags; 3254 } 3255 3256 // Get the list of prefix IDs from the affix list "afflist". 3257 // Used for PFXPOSTPONE. 3258 // Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL 3259 // and return the number of affixes. 3260 static int get_pfxlist(afffile_T *affile, char *afflist, char *store_afflist) 3261 { 3262 int cnt = 0; 3263 char key[AH_KEY_LEN]; 3264 3265 for (char *p = afflist; *p != NUL;) { 3266 char *prevp = p; 3267 if (get_affitem(affile->af_flagtype, &p) != 0) { 3268 // A flag is a postponed prefix flag if it appears in "af_pref" 3269 // and its ID is not zero. 3270 xmemcpyz(key, prevp, (size_t)(p - prevp)); 3271 hashitem_T *hi = hash_find(&affile->af_pref, key); 3272 if (!HASHITEM_EMPTY(hi)) { 3273 int id = HI2AH(hi)->ah_newID; 3274 if (id != 0) { 3275 store_afflist[cnt++] = (char)(uint8_t)id; 3276 } 3277 } 3278 } 3279 if (affile->af_flagtype == AFT_NUM && *p == ',') { 3280 p++; 3281 } 3282 } 3283 3284 store_afflist[cnt] = NUL; 3285 return cnt; 3286 } 3287 3288 // Get the list of compound IDs from the affix list "afflist" that are used 3289 // for compound words. 3290 // Puts the flags in "store_afflist[]". 3291 static void get_compflags(afffile_T *affile, char *afflist, char *store_afflist) 3292 { 3293 int cnt = 0; 3294 char key[AH_KEY_LEN]; 3295 3296 for (char *p = afflist; *p != NUL;) { 3297 char *prevp = p; 3298 if (get_affitem(affile->af_flagtype, &p) != 0) { 3299 // A flag is a compound flag if it appears in "af_comp". 3300 xmemcpyz(key, prevp, (size_t)(p - prevp)); 3301 hashitem_T *hi = hash_find(&affile->af_comp, key); 3302 if (!HASHITEM_EMPTY(hi)) { 3303 store_afflist[cnt++] = (char)(uint8_t)HI2CI(hi)->ci_newID; 3304 } 3305 } 3306 if (affile->af_flagtype == AFT_NUM && *p == ',') { 3307 p++; 3308 } 3309 } 3310 3311 store_afflist[cnt] = NUL; 3312 } 3313 3314 /// Apply affixes to a word and store the resulting words. 3315 /// "ht" is the hashtable with affentry_T that need to be applied, either 3316 /// prefixes or suffixes. 3317 /// "xht", when not NULL, is the prefix hashtable, to be used additionally on 3318 /// the resulting words for combining affixes. 3319 /// 3320 /// @param spin spell info 3321 /// @param word basic word start 3322 /// @param afflist list of names of supported affixes 3323 /// @param condit CONDIT_SUF et al. 3324 /// @param flags flags for the word 3325 /// @param pfxlist list of prefix IDs 3326 /// @param pfxlen nr of flags in "pfxlist" for prefixes, rest is compound flags 3327 /// 3328 /// @return FAIL when out of memory. 3329 static int store_aff_word(spellinfo_T *spin, char *word, char *afflist, afffile_T *affile, 3330 hashtab_T *ht, hashtab_T *xht, int condit, int flags, char *pfxlist, 3331 int pfxlen) 3332 { 3333 affentry_T *ae; 3334 char newword[MAXWLEN]; 3335 int retval = OK; 3336 int j; 3337 char store_afflist[MAXWLEN]; 3338 char pfx_pfxlist[MAXWLEN]; 3339 size_t wordlen = strlen(word); 3340 3341 int todo = (int)ht->ht_used; 3342 for (hashitem_T *hi = ht->ht_array; todo > 0 && retval == OK; hi++) { 3343 if (!HASHITEM_EMPTY(hi)) { 3344 todo--; 3345 affheader_T *ah = HI2AH(hi); 3346 3347 // Check that the affix combines, if required, and that the word 3348 // supports this affix. 3349 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine) 3350 && flag_in_afflist(affile->af_flagtype, afflist, 3351 ah->ah_flag)) { 3352 // Loop over all affix entries with this name. 3353 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) { 3354 // Check the condition. It's not logical to match case 3355 // here, but it is required for compatibility with 3356 // Myspell. 3357 // Another requirement from Myspell is that the chop 3358 // string is shorter than the word itself. 3359 // For prefixes, when "PFXPOSTPONE" was used, only do 3360 // prefixes with a chop string and/or flags. 3361 // When a previously added affix had CIRCUMFIX this one 3362 // must have it too, if it had not then this one must not 3363 // have one either. 3364 if ((xht != NULL || !affile->af_pfxpostpone 3365 || ae->ae_chop != NULL 3366 || ae->ae_flags != NULL) 3367 && (ae->ae_chop == NULL 3368 || strlen(ae->ae_chop) < wordlen) 3369 && (ae->ae_prog == NULL 3370 || vim_regexec_prog(&ae->ae_prog, false, word, 0)) 3371 && (((condit & CONDIT_CFIX) == 0) 3372 == ((condit & CONDIT_AFF) == 0 3373 || ae->ae_flags == NULL 3374 || !flag_in_afflist(affile->af_flagtype, 3375 ae->ae_flags, affile->af_circumfix)))) { 3376 // Match. Remove the chop and add the affix. 3377 if (xht == NULL) { 3378 // prefix: chop/add at the start of the word 3379 if (ae->ae_add == NULL) { 3380 *newword = NUL; 3381 } else { 3382 xstrlcpy(newword, ae->ae_add, MAXWLEN); 3383 } 3384 char *p = word; 3385 if (ae->ae_chop != NULL) { 3386 // Skip chop string. 3387 int i = mb_charlen(ae->ae_chop); 3388 for (; i > 0; i--) { 3389 MB_PTR_ADV(p); 3390 } 3391 } 3392 strcat(newword, p); 3393 } else { 3394 // suffix: chop/add at the end of the word 3395 xstrlcpy(newword, word, MAXWLEN); 3396 if (ae->ae_chop != NULL) { 3397 // Remove chop string. 3398 char *p = newword + strlen(newword); 3399 int i = mb_charlen(ae->ae_chop); 3400 for (; i > 0; i--) { 3401 MB_PTR_BACK(newword, p); 3402 } 3403 *p = NUL; 3404 } 3405 if (ae->ae_add != NULL) { 3406 strcat(newword, ae->ae_add); 3407 } 3408 } 3409 3410 int use_flags = flags; 3411 char *use_pfxlist = pfxlist; 3412 int use_pfxlen = pfxlen; 3413 bool need_affix = false; 3414 int use_condit = condit | CONDIT_COMB | CONDIT_AFF; 3415 if (ae->ae_flags != NULL) { 3416 // Extract flags from the affix list. 3417 use_flags |= get_affix_flags(affile, ae->ae_flags); 3418 3419 if (affile->af_needaffix != 0 3420 && flag_in_afflist(affile->af_flagtype, ae->ae_flags, 3421 affile->af_needaffix)) { 3422 need_affix = true; 3423 } 3424 3425 // When there is a CIRCUMFIX flag the other affix 3426 // must also have it and we don't add the word 3427 // with one affix. 3428 if (affile->af_circumfix != 0 3429 && flag_in_afflist(affile->af_flagtype, ae->ae_flags, 3430 affile->af_circumfix)) { 3431 use_condit |= CONDIT_CFIX; 3432 if ((condit & CONDIT_CFIX) == 0) { 3433 need_affix = true; 3434 } 3435 } 3436 3437 if (affile->af_pfxpostpone 3438 || spin->si_compflags != NULL) { 3439 if (affile->af_pfxpostpone) { 3440 // Get prefix IDS from the affix list. 3441 use_pfxlen = get_pfxlist(affile, ae->ae_flags, store_afflist); 3442 } else { 3443 use_pfxlen = 0; 3444 } 3445 use_pfxlist = store_afflist; 3446 3447 // Combine the prefix IDs. Avoid adding the 3448 // same ID twice. 3449 for (int i = 0; i < pfxlen; i++) { 3450 for (j = 0; j < use_pfxlen; j++) { 3451 if (pfxlist[i] == use_pfxlist[j]) { 3452 break; 3453 } 3454 } 3455 if (j == use_pfxlen) { 3456 use_pfxlist[use_pfxlen++] = pfxlist[i]; 3457 } 3458 } 3459 3460 if (spin->si_compflags != NULL) { 3461 // Get compound IDS from the affix list. 3462 get_compflags(affile, ae->ae_flags, 3463 use_pfxlist + use_pfxlen); 3464 } else { 3465 use_pfxlist[use_pfxlen] = NUL; 3466 } 3467 3468 // Combine the list of compound flags. 3469 // Concatenate them to the prefix IDs list. 3470 // Avoid adding the same ID twice. 3471 for (int i = pfxlen; pfxlist[i] != NUL; i++) { 3472 for (j = use_pfxlen; use_pfxlist[j] != NUL; j++) { 3473 if (pfxlist[i] == use_pfxlist[j]) { 3474 break; 3475 } 3476 } 3477 if (use_pfxlist[j] == NUL) { 3478 use_pfxlist[j++] = pfxlist[i]; 3479 use_pfxlist[j] = NUL; 3480 } 3481 } 3482 } 3483 } 3484 3485 // Obey a "COMPOUNDFORBIDFLAG" of the affix: don't 3486 // use the compound flags. 3487 if (use_pfxlist != NULL && ae->ae_compforbid) { 3488 xmemcpyz(pfx_pfxlist, use_pfxlist, (size_t)use_pfxlen); 3489 use_pfxlist = pfx_pfxlist; 3490 } 3491 3492 // When there are postponed prefixes... 3493 if (spin->si_prefroot != NULL 3494 && spin->si_prefroot->wn_sibling != NULL) { 3495 // ... add a flag to indicate an affix was used. 3496 use_flags |= WF_HAS_AFF; 3497 3498 // ... don't use a prefix list if combining 3499 // affixes is not allowed. But do use the 3500 // compound flags after them. 3501 if (!ah->ah_combine && use_pfxlist != NULL) { 3502 use_pfxlist += use_pfxlen; 3503 } 3504 } 3505 3506 // When compounding is supported and there is no 3507 // "COMPOUNDPERMITFLAG" then forbid compounding on the 3508 // side where the affix is applied. 3509 if (spin->si_compflags != NULL && !ae->ae_comppermit) { 3510 if (xht != NULL) { 3511 use_flags |= WF_NOCOMPAFT; 3512 } else { 3513 use_flags |= WF_NOCOMPBEF; 3514 } 3515 } 3516 3517 // Store the modified word. 3518 if (store_word(spin, newword, use_flags, 3519 spin->si_region, use_pfxlist, 3520 need_affix) == FAIL) { 3521 retval = FAIL; 3522 } 3523 3524 // When added a prefix or a first suffix and the affix 3525 // has flags may add a(nother) suffix. RECURSIVE! 3526 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL) { 3527 if (store_aff_word(spin, newword, ae->ae_flags, 3528 affile, &affile->af_suff, xht, 3529 use_condit & (xht == NULL 3530 ? ~0 : ~CONDIT_SUF), 3531 use_flags, use_pfxlist, pfxlen) == FAIL) { 3532 retval = FAIL; 3533 } 3534 } 3535 3536 // When added a suffix and combining is allowed also 3537 // try adding a prefix additionally. Both for the 3538 // word flags and for the affix flags. RECURSIVE! 3539 if (xht != NULL && ah->ah_combine) { 3540 if (store_aff_word(spin, newword, 3541 afflist, affile, 3542 xht, NULL, use_condit, 3543 use_flags, use_pfxlist, 3544 pfxlen) == FAIL 3545 || (ae->ae_flags != NULL 3546 && store_aff_word(spin, newword, 3547 ae->ae_flags, affile, 3548 xht, NULL, use_condit, 3549 use_flags, use_pfxlist, 3550 pfxlen) == FAIL)) { 3551 retval = FAIL; 3552 } 3553 } 3554 } 3555 } 3556 } 3557 } 3558 } 3559 3560 return retval; 3561 } 3562 3563 // Read a file with a list of words. 3564 static int spell_read_wordfile(spellinfo_T *spin, char *fname) 3565 { 3566 linenr_T lnum = 0; 3567 char rline[MAXLINELEN]; 3568 char *line; 3569 char *pc = NULL; 3570 int retval = OK; 3571 bool did_word = false; 3572 int non_ascii = 0; 3573 3574 // Open the file. 3575 FILE *fd = os_fopen(fname, "r"); 3576 if (fd == NULL) { 3577 semsg(_(e_notopen), fname); 3578 return FAIL; 3579 } 3580 3581 vim_snprintf(IObuff, IOSIZE, _("Reading word file %s..."), fname); 3582 spell_message(spin, IObuff); 3583 3584 // Read all the lines in the file one by one. 3585 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) { 3586 line_breakcheck(); 3587 lnum++; 3588 3589 // Skip comment lines. 3590 if (*rline == '#') { 3591 continue; 3592 } 3593 3594 // Remove CR, LF and white space from the end. 3595 int l = (int)strlen(rline); 3596 while (l > 0 && (uint8_t)rline[l - 1] <= ' ') { 3597 l--; 3598 } 3599 if (l == 0) { 3600 continue; // empty or blank line 3601 } 3602 rline[l] = NUL; 3603 3604 // Convert from "/encoding={encoding}" to 'encoding' when needed. 3605 xfree(pc); 3606 if (spin->si_conv.vc_type != CONV_NONE) { 3607 pc = string_convert(&spin->si_conv, rline, NULL); 3608 if (pc == NULL) { 3609 smsg(0, _("Conversion failure for word in %s line %" PRIdLINENR ": %s"), 3610 fname, lnum, rline); 3611 continue; 3612 } 3613 line = pc; 3614 } else { 3615 pc = NULL; 3616 line = rline; 3617 } 3618 3619 if (*line == '/') { 3620 line++; 3621 if (strncmp(line, "encoding=", 9) == 0) { 3622 if (spin->si_conv.vc_type != CONV_NONE) { 3623 smsg(0, _("Duplicate /encoding= line ignored in %s line %" PRIdLINENR ": %s"), 3624 fname, lnum, line - 1); 3625 } else if (did_word) { 3626 smsg(0, _("/encoding= line after word ignored in %s line %" PRIdLINENR ": %s"), 3627 fname, lnum, line - 1); 3628 } else { 3629 // Setup for conversion to 'encoding'. 3630 line += 9; 3631 char *enc = enc_canonize(line); 3632 if (!spin->si_ascii 3633 && convert_setup(&spin->si_conv, enc, p_enc) == FAIL) { 3634 smsg(0, _("Conversion in %s not supported: from %s to %s"), 3635 fname, line, p_enc); 3636 } 3637 xfree(enc); 3638 spin->si_conv.vc_fail = true; 3639 } 3640 continue; 3641 } 3642 3643 if (strncmp(line, "regions=", 8) == 0) { 3644 if (spin->si_region_count > 1) { 3645 smsg(0, _("Duplicate /regions= line ignored in %s line %" PRIdLINENR ": %s"), 3646 fname, lnum, line); 3647 } else { 3648 line += 8; 3649 if (strlen(line) > MAXREGIONS * 2) { 3650 smsg(0, _("Too many regions in %s line %" PRIdLINENR ": %s"), 3651 fname, lnum, line); 3652 } else { 3653 spin->si_region_count = (int)strlen(line) / 2; 3654 STRCPY(spin->si_region_name, line); 3655 3656 // Adjust the mask for a word valid in all regions. 3657 spin->si_region = (1 << spin->si_region_count) - 1; 3658 } 3659 } 3660 continue; 3661 } 3662 3663 smsg(0, _("/ line ignored in %s line %" PRIdLINENR ": %s"), 3664 fname, lnum, line - 1); 3665 continue; 3666 } 3667 3668 int flags = 0; 3669 int regionmask = spin->si_region; 3670 3671 // Check for flags and region after a slash. 3672 char *p = vim_strchr(line, '/'); 3673 if (p != NULL) { 3674 *p++ = NUL; 3675 while (*p != NUL) { 3676 if (*p == '=') { // keep-case word 3677 flags |= WF_KEEPCAP | WF_FIXCAP; 3678 } else if (*p == '!') { // Bad, bad, wicked word. 3679 flags |= WF_BANNED; 3680 } else if (*p == '?') { // Rare word. 3681 flags |= WF_RARE; 3682 } else if (ascii_isdigit((uint8_t)(*p))) { // region number(s) 3683 if ((flags & WF_REGION) == 0) { // first one 3684 regionmask = 0; 3685 } 3686 flags |= WF_REGION; 3687 3688 l = (uint8_t)(*p) - '0'; 3689 if (l == 0 || l > spin->si_region_count) { 3690 smsg(0, _("Invalid region nr in %s line %" PRIdLINENR ": %s"), 3691 fname, lnum, p); 3692 break; 3693 } 3694 regionmask |= 1 << (l - 1); 3695 } else { 3696 smsg(0, _("Unrecognized flags in %s line %" PRIdLINENR ": %s"), 3697 fname, lnum, p); 3698 break; 3699 } 3700 p++; 3701 } 3702 } 3703 3704 // Skip non-ASCII words when "spin->si_ascii" is true. 3705 if (spin->si_ascii && has_non_ascii(line)) { 3706 non_ascii++; 3707 continue; 3708 } 3709 3710 // Normal word: store it. 3711 if (store_word(spin, line, flags, regionmask, NULL, false) == FAIL) { 3712 retval = FAIL; 3713 break; 3714 } 3715 did_word = true; 3716 } 3717 3718 xfree(pc); 3719 fclose(fd); 3720 3721 if (spin->si_ascii && non_ascii > 0) { 3722 vim_snprintf(IObuff, IOSIZE, 3723 _("Ignored %d words with non-ASCII characters"), non_ascii); 3724 spell_message(spin, IObuff); 3725 } 3726 3727 return retval; 3728 } 3729 3730 /// Get part of an sblock_T, "len" bytes long. 3731 /// This avoids calling free() for every little struct we use (and keeping 3732 /// track of them). 3733 /// The memory is cleared to all zeros. 3734 /// 3735 /// @param len Length needed (<= SBLOCKSIZE). 3736 /// @param align Align for pointer. 3737 /// @return Pointer into block data. 3738 static void *getroom(spellinfo_T *spin, size_t len, bool align) 3739 FUNC_ATTR_NONNULL_RET 3740 { 3741 sblock_T *bl = spin->si_blocks; 3742 3743 assert(len <= SBLOCKSIZE); 3744 3745 if (align && bl != NULL) { 3746 // Round size up for alignment. On some systems structures need to be 3747 // aligned to the size of a pointer (e.g., SPARC). 3748 bl->sb_used = (int)(((size_t)bl->sb_used + sizeof(char *) - 1) & ~(sizeof(char *) - 1)); 3749 } 3750 3751 if (bl == NULL || (size_t)bl->sb_used + len > SBLOCKSIZE) { 3752 // Allocate a block of memory. It is not freed until much later. 3753 bl = xcalloc(1, offsetof(sblock_T, sb_data) + SBLOCKSIZE + 1); 3754 bl->sb_next = spin->si_blocks; 3755 spin->si_blocks = bl; 3756 bl->sb_used = 0; 3757 spin->si_blocks_cnt++; 3758 } 3759 3760 char *p = bl->sb_data + bl->sb_used; 3761 bl->sb_used += (int)len; 3762 3763 return p; 3764 } 3765 3766 /// Make a copy of a string into memory allocated with getroom(). 3767 /// 3768 /// @return NULL when out of memory. 3769 static char *getroom_save(spellinfo_T *spin, char *s) 3770 { 3771 const size_t s_size = strlen(s) + 1; 3772 return memcpy(getroom(spin, s_size, false), s, s_size); 3773 } 3774 3775 // Free the list of allocated sblock_T. 3776 static void free_blocks(sblock_T *bl) 3777 { 3778 while (bl != NULL) { 3779 sblock_T *next = bl->sb_next; 3780 xfree(bl); 3781 bl = next; 3782 } 3783 } 3784 3785 // Allocate the root of a word tree. 3786 // Returns NULL when out of memory. 3787 static wordnode_T *wordtree_alloc(spellinfo_T *spin) 3788 FUNC_ATTR_NONNULL_RET 3789 { 3790 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), true); 3791 } 3792 3793 /// Return true if "word" contains valid word characters. 3794 /// Control characters and trailing '/' are invalid. Space is OK. 3795 static bool valid_spell_word(const char *word, const char *end) 3796 { 3797 if (!utf_valid_string(word, end)) { 3798 return false; 3799 } 3800 for (const char *p = word; *p != NUL && p < end; p += utfc_ptr2len(p)) { 3801 if ((uint8_t)(*p) < ' ' || (p[0] == '/' && p[1] == NUL)) { 3802 return false; 3803 } 3804 } 3805 return true; 3806 } 3807 3808 /// Store a word in the tree(s). 3809 /// Always store it in the case-folded tree. For a keep-case word this is 3810 /// useful when the word can also be used with all caps (no WF_FIXCAP flag) and 3811 /// used to find suggestions. 3812 /// For a keep-case word also store it in the keep-case tree. 3813 /// When "pfxlist" is not NULL store the word for each postponed prefix ID and 3814 /// compound flag. 3815 /// 3816 /// @param flags extra flags, wf_banned 3817 /// @param region supported region(s) 3818 /// @param pfxlist list of prefix ids or null 3819 /// @param need_affix only store word with affix id 3820 static int store_word(spellinfo_T *spin, char *word, int flags, int region, const char *pfxlist, 3821 bool need_affix) 3822 { 3823 int len = (int)strlen(word); 3824 int ct = captype(word, word + len); 3825 char foldword[MAXWLEN]; 3826 int res = OK; 3827 3828 // Avoid adding illegal bytes to the word tree. 3829 if (!valid_spell_word(word, word + len)) { 3830 return FAIL; 3831 } 3832 3833 spell_casefold(curwin, word, len, foldword, MAXWLEN); 3834 for (const char *p = pfxlist; res == OK; p++) { 3835 if (!need_affix || (p != NULL && *p != NUL)) { 3836 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, 3837 region, p == NULL ? 0 : *p); 3838 } 3839 if (p == NULL || *p == NUL) { 3840 break; 3841 } 3842 } 3843 spin->si_foldwcount++; 3844 3845 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) { 3846 for (const char *p = pfxlist; res == OK; p++) { 3847 if (!need_affix || (p != NULL && *p != NUL)) { 3848 res = tree_add_word(spin, word, spin->si_keeproot, flags, 3849 region, p == NULL ? 0 : *p); 3850 } 3851 if (p == NULL || *p == NUL) { 3852 break; 3853 } 3854 } 3855 spin->si_keepwcount++; 3856 } 3857 return res; 3858 } 3859 3860 // Add word "word" to a word tree at "root". 3861 // When "flags" < 0 we are adding to the prefix tree where "flags" is used for 3862 // "rare" and "region" is the condition nr. 3863 // Returns FAIL when out of memory. 3864 static int tree_add_word(spellinfo_T *spin, const char *word, wordnode_T *root, int flags, 3865 int region, int affixID) 3866 { 3867 wordnode_T *node = root; 3868 wordnode_T **prev = NULL; 3869 3870 // Add each byte of the word to the tree, including the NUL at the end. 3871 for (int i = 0;; i++) { 3872 // When there is more than one reference to this node we need to make 3873 // a copy, so that we can modify it. Copy the whole list of siblings 3874 // (we don't optimize for a partly shared list of siblings). 3875 if (node != NULL && node->wn_refs > 1) { 3876 node->wn_refs--; 3877 wordnode_T **copyprev = prev; 3878 for (wordnode_T *copyp = node; copyp != NULL; copyp = copyp->wn_sibling) { 3879 // Allocate a new node and copy the info. 3880 wordnode_T *np = get_wordnode(spin); 3881 if (np == NULL) { 3882 return FAIL; 3883 } 3884 np->wn_child = copyp->wn_child; 3885 if (np->wn_child != NULL) { 3886 np->wn_child->wn_refs++; // child gets extra ref 3887 } 3888 np->wn_byte = copyp->wn_byte; 3889 if (np->wn_byte == NUL) { 3890 np->wn_flags = copyp->wn_flags; 3891 np->wn_region = copyp->wn_region; 3892 np->wn_affixID = copyp->wn_affixID; 3893 } 3894 3895 // Link the new node in the list, there will be one ref. 3896 np->wn_refs = 1; 3897 if (copyprev != NULL) { 3898 *copyprev = np; 3899 } 3900 copyprev = &np->wn_sibling; 3901 3902 // Let "node" point to the head of the copied list. 3903 if (copyp == node) { 3904 node = np; 3905 } 3906 } 3907 } 3908 3909 // Look for the sibling that has the same character. They are sorted 3910 // on byte value, thus stop searching when a sibling is found with a 3911 // higher byte value. For zero bytes (end of word) the sorting is 3912 // done on flags and then on affixID. 3913 while (node != NULL 3914 && (node->wn_byte < (uint8_t)word[i] 3915 || (node->wn_byte == NUL 3916 && (flags < 0 3917 ? node->wn_affixID < (unsigned)affixID 3918 : (node->wn_flags < (unsigned)(flags & WN_MASK) 3919 || (node->wn_flags == (flags & WN_MASK) 3920 && (spin->si_sugtree 3921 ? (node->wn_region & 0xffff) < region 3922 : node->wn_affixID 3923 < (unsigned)affixID))))))) { 3924 prev = &node->wn_sibling; 3925 node = *prev; 3926 } 3927 if (node == NULL 3928 || node->wn_byte != (uint8_t)word[i] 3929 || (word[i] == NUL 3930 && (flags < 0 3931 || spin->si_sugtree 3932 || node->wn_flags != (flags & WN_MASK) 3933 || node->wn_affixID != affixID))) { 3934 // Allocate a new node. 3935 wordnode_T *np = get_wordnode(spin); 3936 if (np == NULL) { 3937 return FAIL; 3938 } 3939 np->wn_byte = (uint8_t)word[i]; 3940 3941 // If "node" is NULL this is a new child or the end of the sibling 3942 // list: ref count is one. Otherwise use ref count of sibling and 3943 // make ref count of sibling one (matters when inserting in front 3944 // of the list of siblings). 3945 if (node == NULL) { 3946 np->wn_refs = 1; 3947 } else { 3948 np->wn_refs = node->wn_refs; 3949 node->wn_refs = 1; 3950 } 3951 if (prev != NULL) { 3952 *prev = np; 3953 } 3954 np->wn_sibling = node; 3955 node = np; 3956 } 3957 3958 if (word[i] == NUL) { 3959 node->wn_flags = (uint16_t)flags; 3960 node->wn_region |= (int16_t)region; 3961 node->wn_affixID = (uint8_t)affixID; 3962 break; 3963 } 3964 prev = &node->wn_child; 3965 node = *prev; 3966 } 3967 #ifdef SPELL_PRINTTREE 3968 smsg(0, "Added \"%s\"", word); 3969 spell_print_tree(root->wn_sibling); 3970 #endif 3971 3972 // count nr of words added since last message 3973 spin->si_msg_count++; 3974 3975 if (spin->si_compress_cnt > 1) { 3976 if (--spin->si_compress_cnt == 1) { 3977 // Did enough words to lower the block count limit. 3978 spin->si_blocks_cnt += compress_inc; 3979 } 3980 } 3981 3982 // When we have allocated lots of memory we need to compress the word tree 3983 // to free up some room. But compression is slow, and we might actually 3984 // need that room, thus only compress in the following situations: 3985 // 1. When not compressed before (si_compress_cnt == 0): when using 3986 // "compress_start" blocks. 3987 // 2. When compressed before and used "compress_inc" blocks before 3988 // adding "compress_added" words (si_compress_cnt > 1). 3989 // 3. When compressed before, added "compress_added" words 3990 // (si_compress_cnt == 1) and the number of free nodes drops below the 3991 // maximum word length. 3992 #ifndef SPELL_COMPRESS_ALWAYS 3993 if (spin->si_compress_cnt == 1 3994 ? spin->si_free_count < MAXWLEN 3995 : spin->si_blocks_cnt >= compress_start) 3996 #endif 3997 { 3998 // Decrement the block counter. The effect is that we compress again 3999 // when the freed up room has been used and another "compress_inc" 4000 // blocks have been allocated. Unless "compress_added" words have 4001 // been added, then the limit is put back again. 4002 spin->si_blocks_cnt -= compress_inc; 4003 spin->si_compress_cnt = compress_added; 4004 4005 if (spin->si_verbose) { 4006 msg_start(); 4007 msg_puts(_(msg_compressing)); 4008 msg_clr_eos(); 4009 msg_didout = false; 4010 msg_col = 0; 4011 ui_flush(); 4012 } 4013 4014 // Compress both trees. Either they both have many nodes, which makes 4015 // compression useful, or one of them is small, which means 4016 // compression goes fast. But when filling the soundfold word tree 4017 // there is no keep-case tree. 4018 wordtree_compress(spin, spin->si_foldroot, "case-folded"); 4019 if (affixID >= 0) { 4020 wordtree_compress(spin, spin->si_keeproot, "keep-case"); 4021 } 4022 } 4023 4024 return OK; 4025 } 4026 4027 // Get a wordnode_T, either from the list of previously freed nodes or 4028 // allocate a new one. 4029 // Returns NULL when out of memory. 4030 static wordnode_T *get_wordnode(spellinfo_T *spin) 4031 { 4032 wordnode_T *n; 4033 4034 if (spin->si_first_free == NULL) { 4035 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), true); 4036 } else { 4037 n = spin->si_first_free; 4038 spin->si_first_free = n->wn_child; 4039 CLEAR_POINTER(n); 4040 spin->si_free_count--; 4041 } 4042 #ifdef SPELL_PRINTTREE 4043 if (n != NULL) { 4044 n->wn_nr = ++spin->si_wordnode_nr; 4045 } 4046 #endif 4047 return n; 4048 } 4049 4050 // Decrement the reference count on a node (which is the head of a list of 4051 // siblings). If the reference count becomes zero free the node and its 4052 // siblings. 4053 // Returns the number of nodes actually freed. 4054 static int deref_wordnode(spellinfo_T *spin, wordnode_T *node) 4055 FUNC_ATTR_NONNULL_ALL 4056 { 4057 int cnt = 0; 4058 4059 if (--node->wn_refs == 0) { 4060 for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) { 4061 if (np->wn_child != NULL) { 4062 cnt += deref_wordnode(spin, np->wn_child); 4063 } 4064 free_wordnode(spin, np); 4065 cnt++; 4066 } 4067 cnt++; // length field 4068 } 4069 return cnt; 4070 } 4071 4072 // Free a wordnode_T for re-use later. 4073 // Only the "wn_child" field becomes invalid. 4074 static void free_wordnode(spellinfo_T *spin, wordnode_T *n) 4075 FUNC_ATTR_NONNULL_ALL 4076 { 4077 n->wn_child = spin->si_first_free; 4078 spin->si_first_free = n; 4079 spin->si_free_count++; 4080 } 4081 4082 // Compress a tree: find tails that are identical and can be shared. 4083 static void wordtree_compress(spellinfo_T *spin, wordnode_T *root, const char *name) 4084 FUNC_ATTR_NONNULL_ALL 4085 { 4086 hashtab_T ht; 4087 int tot = 0; 4088 long perc; 4089 4090 // Skip the root itself, it's not actually used. The first sibling is the 4091 // start of the tree. 4092 if (root->wn_sibling == NULL) { 4093 return; 4094 } 4095 4096 hash_init(&ht); 4097 const int n = node_compress(spin, root->wn_sibling, &ht, &tot); 4098 4099 #ifndef SPELL_PRINTTREE 4100 if (spin->si_verbose || p_verbose > 2) 4101 #endif 4102 { 4103 if (tot > 1000000) { 4104 perc = (tot - n) / (tot / 100); 4105 } else if (tot == 0) { 4106 perc = 0; 4107 } else { 4108 perc = (tot - n) * 100 / tot; 4109 } 4110 vim_snprintf(IObuff, IOSIZE, 4111 _("Compressed %s: %d of %d nodes; %d (%ld%%) remaining"), 4112 name, n, tot, tot - n, perc); 4113 spell_message(spin, IObuff); 4114 } 4115 #ifdef SPELL_PRINTTREE 4116 spell_print_tree(root->wn_sibling); 4117 #endif 4118 hash_clear(&ht); 4119 } 4120 4121 /// Compress a node, its siblings and its children, depth first. 4122 /// Returns the number of compressed nodes. 4123 /// 4124 /// @param tot total count of nodes before compressing, incremented while going through the tree 4125 static int node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot) 4126 FUNC_ATTR_NONNULL_ALL 4127 { 4128 wordnode_T *tp; 4129 wordnode_T *child; 4130 int len = 0; 4131 unsigned n; 4132 int compressed = 0; 4133 4134 // Go through the list of siblings. Compress each child and then try 4135 // finding an identical child to replace it. 4136 // Note that with "child" we mean not just the node that is pointed to, 4137 // but the whole list of siblings of which the child node is the first. 4138 for (wordnode_T *np = node; np != NULL && !got_int; np = np->wn_sibling) { 4139 len++; 4140 if ((child = np->wn_child) != NULL) { 4141 // Compress the child first. This fills hashkey. 4142 compressed += node_compress(spin, child, ht, tot); 4143 4144 // Try to find an identical child. 4145 hash_T hash = hash_hash((char *)child->wn_u1.hashkey); 4146 hashitem_T *hi = hash_lookup(ht, (const char *)child->wn_u1.hashkey, 4147 strlen((char *)child->wn_u1.hashkey), hash); 4148 if (!HASHITEM_EMPTY(hi)) { 4149 // There are children we encountered before with a hash value 4150 // identical to the current child. Now check if there is one 4151 // that is really identical. 4152 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) { 4153 if (node_equal(child, tp)) { 4154 // Found one! Now use that child in place of the 4155 // current one. This means the current child and all 4156 // its siblings is unlinked from the tree. 4157 tp->wn_refs++; 4158 compressed += deref_wordnode(spin, child); 4159 np->wn_child = tp; 4160 break; 4161 } 4162 } 4163 if (tp == NULL) { 4164 // No other child with this hash value equals the child of 4165 // the node, add it to the linked list after the first 4166 // item. 4167 tp = HI2WN(hi); 4168 child->wn_u2.next = tp->wn_u2.next; 4169 tp->wn_u2.next = child; 4170 } 4171 } else { 4172 // No other child has this hash value, add it to the 4173 // hashtable. 4174 hash_add_item(ht, hi, (char *)child->wn_u1.hashkey, hash); 4175 } 4176 } 4177 } 4178 *tot += len + 1; // add one for the node that stores the length 4179 4180 // Make a hash key for the node and its siblings, so that we can quickly 4181 // find a lookalike node. This must be done after compressing the sibling 4182 // list, otherwise the hash key would become invalid by the compression. 4183 node->wn_u1.hashkey[0] = (uint8_t)len; 4184 unsigned nr = 0; 4185 for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) { 4186 if (np->wn_byte == NUL) { 4187 // end node: use wn_flags, wn_region and wn_affixID 4188 n = (unsigned)(np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16)); 4189 } else { 4190 // byte node: use the byte value and the child pointer 4191 n = (unsigned)(np->wn_byte + ((uintptr_t)np->wn_child << 8)); 4192 } 4193 nr = nr * 101 + n; 4194 } 4195 4196 // Avoid NUL bytes, it terminates the hash key. 4197 n = nr & 0xff; 4198 node->wn_u1.hashkey[1] = n == 0 ? 1 : (uint8_t)n; 4199 n = (nr >> 8) & 0xff; 4200 node->wn_u1.hashkey[2] = n == 0 ? 1 : (uint8_t)n; 4201 n = (nr >> 16) & 0xff; 4202 node->wn_u1.hashkey[3] = n == 0 ? 1 : (uint8_t)n; 4203 n = (nr >> 24) & 0xff; 4204 node->wn_u1.hashkey[4] = n == 0 ? 1 : (uint8_t)n; 4205 node->wn_u1.hashkey[5] = NUL; 4206 4207 // Check for CTRL-C pressed now and then. 4208 veryfast_breakcheck(); 4209 4210 return compressed; 4211 } 4212 4213 // Returns true when two nodes have identical siblings and children. 4214 static bool node_equal(wordnode_T *n1, wordnode_T *n2) 4215 { 4216 wordnode_T *p1; 4217 wordnode_T *p2; 4218 4219 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; 4220 p1 = p1->wn_sibling, p2 = p2->wn_sibling) { 4221 if (p1->wn_byte != p2->wn_byte 4222 || (p1->wn_byte == NUL 4223 ? (p1->wn_flags != p2->wn_flags 4224 || p1->wn_region != p2->wn_region 4225 || p1->wn_affixID != p2->wn_affixID) 4226 : (p1->wn_child != p2->wn_child))) { 4227 break; 4228 } 4229 } 4230 4231 return p1 == NULL && p2 == NULL; 4232 } 4233 4234 /// Function given to qsort() to sort the REP items on "from" string. 4235 static int rep_compare(const void *s1, const void *s2) 4236 { 4237 fromto_T *p1 = (fromto_T *)s1; 4238 fromto_T *p2 = (fromto_T *)s2; 4239 4240 return strcmp(p1->ft_from, p2->ft_from); 4241 } 4242 4243 /// Write the Vim .spl file "fname". 4244 /// 4245 /// @return OK/FAIL. 4246 static int write_vim_spell(spellinfo_T *spin, char *fname) 4247 { 4248 int retval = OK; 4249 int regionmask; 4250 4251 FILE *fd = os_fopen(fname, "w"); 4252 if (fd == NULL) { 4253 semsg(_(e_notopen), fname); 4254 return FAIL; 4255 } 4256 4257 // <HEADER>: <fileID> <versionnr> 4258 // <fileID> 4259 size_t fwv = fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, 1, fd); 4260 if (fwv != 1) { 4261 // Catch first write error, don't try writing more. 4262 goto theend; 4263 } 4264 4265 putc(VIMSPELLVERSION, fd); // <versionnr> 4266 4267 // <SECTIONS>: <section> ... <sectionend> 4268 4269 // SN_INFO: <infotext> 4270 if (spin->si_info != NULL) { 4271 putc(SN_INFO, fd); // <sectionID> 4272 putc(0, fd); // <sectionflags> 4273 size_t i = strlen(spin->si_info); 4274 put_bytes(fd, i, 4); // <sectionlen> 4275 fwv &= fwrite(spin->si_info, i, 1, fd); // <infotext> 4276 } 4277 4278 // SN_REGION: <regionname> ... 4279 // Write the region names only if there is more than one. 4280 if (spin->si_region_count > 1) { 4281 putc(SN_REGION, fd); // <sectionID> 4282 putc(SNF_REQUIRED, fd); // <sectionflags> 4283 size_t l = (size_t)spin->si_region_count * 2; 4284 put_bytes(fd, l, 4); // <sectionlen> 4285 fwv &= fwrite(spin->si_region_name, l, 1, fd); 4286 // <regionname> ... 4287 regionmask = (1 << spin->si_region_count) - 1; 4288 } else { 4289 regionmask = 0; 4290 } 4291 4292 // SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> 4293 // 4294 // The table with character flags and the table for case folding. 4295 // This makes sure the same characters are recognized as word characters 4296 // when generating and when using a spell file. 4297 // Skip this for ASCII, the table may conflict with the one used for 4298 // 'encoding'. 4299 // Also skip this for an .add.spl file, the main spell file must contain 4300 // the table (avoids that it conflicts). File is shorter too. 4301 if (!spin->si_ascii && !spin->si_add) { 4302 char folchars[128 * 8]; 4303 4304 putc(SN_CHARFLAGS, fd); // <sectionID> 4305 putc(SNF_REQUIRED, fd); // <sectionflags> 4306 4307 // Form the <folchars> string first, we need to know its length. 4308 size_t l = 0; 4309 for (size_t i = 128; i < 256; i++) { 4310 l += (size_t)utf_char2bytes(spelltab.st_fold[i], folchars + l); 4311 } 4312 put_bytes(fd, 1 + 128 + 2 + l, 4); // <sectionlen> 4313 4314 fputc(128, fd); // <charflagslen> 4315 for (size_t i = 128; i < 256; i++) { 4316 int flags = 0; 4317 if (spelltab.st_isw[i]) { 4318 flags |= CF_WORD; 4319 } 4320 if (spelltab.st_isu[i]) { 4321 flags |= CF_UPPER; 4322 } 4323 fputc(flags, fd); // <charflags> 4324 } 4325 4326 put_bytes(fd, l, 2); // <folcharslen> 4327 fwv &= fwrite(folchars, l, 1, fd); // <folchars> 4328 } 4329 4330 // SN_MIDWORD: <midword> 4331 if (spin->si_midword != NULL) { 4332 putc(SN_MIDWORD, fd); // <sectionID> 4333 putc(SNF_REQUIRED, fd); // <sectionflags> 4334 4335 size_t i = strlen(spin->si_midword); 4336 put_bytes(fd, i, 4); // <sectionlen> 4337 fwv &= fwrite(spin->si_midword, i, 1, fd); 4338 // <midword> 4339 } 4340 4341 // SN_PREFCOND: <prefcondcnt> <prefcond> ... 4342 if (!GA_EMPTY(&spin->si_prefcond)) { 4343 putc(SN_PREFCOND, fd); // <sectionID> 4344 putc(SNF_REQUIRED, fd); // <sectionflags> 4345 4346 size_t l = (size_t)write_spell_prefcond(NULL, &spin->si_prefcond, &fwv); 4347 put_bytes(fd, l, 4); // <sectionlen> 4348 4349 write_spell_prefcond(fd, &spin->si_prefcond, &fwv); 4350 } 4351 4352 // SN_REP: <repcount> <rep> ... 4353 // SN_SAL: <salflags> <salcount> <sal> ... 4354 // SN_REPSAL: <repcount> <rep> ... 4355 4356 // round 1: SN_REP section 4357 // round 2: SN_SAL section (unless SN_SOFO is used) 4358 // round 3: SN_REPSAL section 4359 for (unsigned round = 1; round <= 3; round++) { 4360 garray_T *gap; 4361 if (round == 1) { 4362 gap = &spin->si_rep; 4363 } else if (round == 2) { 4364 // Don't write SN_SAL when using a SN_SOFO section 4365 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) { 4366 continue; 4367 } 4368 gap = &spin->si_sal; 4369 } else { 4370 gap = &spin->si_repsal; 4371 } 4372 4373 // Don't write the section if there are no items. 4374 if (GA_EMPTY(gap)) { 4375 continue; 4376 } 4377 4378 // Sort the REP/REPSAL items. 4379 if (round != 2) { 4380 qsort(gap->ga_data, (size_t)gap->ga_len, 4381 sizeof(fromto_T), rep_compare); 4382 } 4383 4384 int sect_id = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); 4385 putc(sect_id, fd); // <sectionID> 4386 4387 // This is for making suggestions, section is not required. 4388 putc(0, fd); // <sectionflags> 4389 4390 // Compute the length of what follows. 4391 size_t l = 2; // count <repcount> or <salcount> 4392 assert(gap->ga_len >= 0); 4393 for (size_t i = 0; i < (size_t)gap->ga_len; i++) { 4394 fromto_T *ftp = &((fromto_T *)gap->ga_data)[i]; 4395 l += 1 + strlen(ftp->ft_from); // count <*fromlen> and <*from> 4396 l += 1 + strlen(ftp->ft_to); // count <*tolen> and <*to> 4397 } 4398 if (round == 2) { 4399 l++; // count <salflags> 4400 } 4401 put_bytes(fd, l, 4); // <sectionlen> 4402 4403 if (round == 2) { 4404 int i = 0; 4405 if (spin->si_followup) { 4406 i |= SAL_F0LLOWUP; 4407 } 4408 if (spin->si_collapse) { 4409 i |= SAL_COLLAPSE; 4410 } 4411 if (spin->si_rem_accents) { 4412 i |= SAL_REM_ACCENTS; 4413 } 4414 putc(i, fd); // <salflags> 4415 } 4416 4417 put_bytes(fd, (uintmax_t)gap->ga_len, 2); // <repcount> or <salcount> 4418 for (size_t i = 0; i < (size_t)gap->ga_len; i++) { 4419 // <rep> : <repfromlen> <repfrom> <reptolen> <repto> 4420 // <sal> : <salfromlen> <salfrom> <saltolen> <salto> 4421 fromto_T *ftp = &((fromto_T *)gap->ga_data)[i]; 4422 for (unsigned rr = 1; rr <= 2; rr++) { 4423 char *p = rr == 1 ? ftp->ft_from : ftp->ft_to; 4424 l = strlen(p); 4425 assert(l < INT_MAX); 4426 putc((int)l, fd); 4427 if (l > 0) { 4428 fwv &= fwrite(p, l, 1, fd); 4429 } 4430 } 4431 } 4432 } 4433 4434 // SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> 4435 // This is for making suggestions, section is not required. 4436 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) { 4437 putc(SN_SOFO, fd); // <sectionID> 4438 putc(0, fd); // <sectionflags> 4439 4440 size_t l = strlen(spin->si_sofofr); 4441 put_bytes(fd, l + strlen(spin->si_sofoto) + 4, 4); // <sectionlen> 4442 4443 put_bytes(fd, l, 2); // <sofofromlen> 4444 fwv &= fwrite(spin->si_sofofr, l, 1, fd); // <sofofrom> 4445 4446 l = strlen(spin->si_sofoto); 4447 put_bytes(fd, l, 2); // <sofotolen> 4448 fwv &= fwrite(spin->si_sofoto, l, 1, fd); // <sofoto> 4449 } 4450 4451 // SN_WORDS: <word> ... 4452 // This is for making suggestions, section is not required. 4453 if (spin->si_commonwords.ht_used > 0) { 4454 putc(SN_WORDS, fd); // <sectionID> 4455 putc(0, fd); // <sectionflags> 4456 4457 // round 1: count the bytes 4458 // round 2: write the bytes 4459 for (unsigned round = 1; round <= 2; round++) { 4460 size_t todo; 4461 size_t len = 0; 4462 hashitem_T *hi; 4463 4464 todo = spin->si_commonwords.ht_used; 4465 for (hi = spin->si_commonwords.ht_array; todo > 0; hi++) { 4466 if (!HASHITEM_EMPTY(hi)) { 4467 size_t l = strlen(hi->hi_key) + 1; 4468 len += l; 4469 if (round == 2) { // <word> 4470 fwv &= fwrite(hi->hi_key, l, 1, fd); 4471 } 4472 todo--; 4473 } 4474 } 4475 if (round == 1) { 4476 put_bytes(fd, len, 4); // <sectionlen> 4477 } 4478 } 4479 } 4480 4481 // SN_MAP: <mapstr> 4482 // This is for making suggestions, section is not required. 4483 if (!GA_EMPTY(&spin->si_map)) { 4484 putc(SN_MAP, fd); // <sectionID> 4485 putc(0, fd); // <sectionflags> 4486 size_t l = (size_t)spin->si_map.ga_len; 4487 put_bytes(fd, l, 4); // <sectionlen> 4488 fwv &= fwrite(spin->si_map.ga_data, l, 1, fd); // <mapstr> 4489 } 4490 4491 // SN_SUGFILE: <timestamp> 4492 // This is used to notify that a .sug file may be available and at the 4493 // same time allows for checking that a .sug file that is found matches 4494 // with this .spl file. That's because the word numbers must be exactly 4495 // right. 4496 if (!spin->si_nosugfile 4497 && (!GA_EMPTY(&spin->si_sal) 4498 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) { 4499 putc(SN_SUGFILE, fd); // <sectionID> 4500 putc(0, fd); // <sectionflags> 4501 put_bytes(fd, 8, 4); // <sectionlen> 4502 4503 // Set si_sugtime and write it to the file. 4504 spin->si_sugtime = time(NULL); 4505 put_time(fd, spin->si_sugtime); // <timestamp> 4506 } 4507 4508 // SN_NOSPLITSUGS: nothing 4509 // This is used to notify that no suggestions with word splits are to be 4510 // made. 4511 if (spin->si_nosplitsugs) { 4512 putc(SN_NOSPLITSUGS, fd); // <sectionID> 4513 putc(0, fd); // <sectionflags> 4514 put_bytes(fd, 0, 4); // <sectionlen> 4515 } 4516 4517 // SN_NOCOMPUNDSUGS: nothing 4518 // This is used to notify that no suggestions with compounds are to be 4519 // made. 4520 if (spin->si_nocompoundsugs) { 4521 putc(SN_NOCOMPOUNDSUGS, fd); // <sectionID> 4522 putc(0, fd); // <sectionflags> 4523 put_bytes(fd, 0, 4); // <sectionlen> 4524 } 4525 4526 // SN_COMPOUND: compound info. 4527 // We don't mark it required, when not supported all compound words will 4528 // be bad words. 4529 if (spin->si_compflags != NULL) { 4530 putc(SN_COMPOUND, fd); // <sectionID> 4531 putc(0, fd); // <sectionflags> 4532 4533 size_t l = strlen(spin->si_compflags); 4534 assert(spin->si_comppat.ga_len >= 0); 4535 for (size_t i = 0; i < (size_t)spin->si_comppat.ga_len; i++) { 4536 l += strlen(((char **)(spin->si_comppat.ga_data))[i]) + 1; 4537 } 4538 put_bytes(fd, l + 7, 4); // <sectionlen> 4539 4540 putc(spin->si_compmax, fd); // <compmax> 4541 putc(spin->si_compminlen, fd); // <compminlen> 4542 putc(spin->si_compsylmax, fd); // <compsylmax> 4543 putc(0, fd); // for Vim 7.0b compatibility 4544 putc(spin->si_compoptions, fd); // <compoptions> 4545 put_bytes(fd, (uintmax_t)spin->si_comppat.ga_len, 2); // <comppatcount> 4546 for (size_t i = 0; i < (size_t)spin->si_comppat.ga_len; i++) { 4547 char *p = ((char **)(spin->si_comppat.ga_data))[i]; 4548 assert(strlen(p) < INT_MAX); 4549 putc((int)strlen(p), fd); // <comppatlen> 4550 fwv &= fwrite(p, strlen(p), 1, fd); // <comppattext> 4551 } 4552 // <compflags> 4553 fwv &= fwrite(spin->si_compflags, strlen(spin->si_compflags), 1, fd); 4554 } 4555 4556 // SN_NOBREAK: NOBREAK flag 4557 if (spin->si_nobreak) { 4558 putc(SN_NOBREAK, fd); // <sectionID> 4559 putc(0, fd); // <sectionflags> 4560 4561 // It's empty, the presence of the section flags the feature. 4562 put_bytes(fd, 0, 4); // <sectionlen> 4563 } 4564 4565 // SN_SYLLABLE: syllable info. 4566 // We don't mark it required, when not supported syllables will not be 4567 // counted. 4568 if (spin->si_syllable != NULL) { 4569 putc(SN_SYLLABLE, fd); // <sectionID> 4570 putc(0, fd); // <sectionflags> 4571 4572 size_t l = strlen(spin->si_syllable); 4573 put_bytes(fd, l, 4); // <sectionlen> 4574 fwv &= fwrite(spin->si_syllable, l, 1, fd); // <syllable> 4575 } 4576 4577 // end of <SECTIONS> 4578 putc(SN_END, fd); // <sectionend> 4579 4580 // <LWORDTREE> <KWORDTREE> <PREFIXTREE> 4581 spin->si_memtot = 0; 4582 for (unsigned round = 1; round <= 3; round++) { 4583 wordnode_T *tree; 4584 if (round == 1) { 4585 tree = spin->si_foldroot->wn_sibling; 4586 } else if (round == 2) { 4587 tree = spin->si_keeproot->wn_sibling; 4588 } else { 4589 tree = spin->si_prefroot->wn_sibling; 4590 } 4591 4592 // Clear the index and wnode fields in the tree. 4593 clear_node(tree); 4594 4595 // Count the number of nodes. Needed to be able to allocate the 4596 // memory when reading the nodes. Also fills in index for shared 4597 // nodes. 4598 size_t nodecount = (size_t)put_node(NULL, tree, 0, regionmask, round == 3); 4599 4600 // number of nodes in 4 bytes 4601 put_bytes(fd, nodecount, 4); // <nodecount> 4602 assert(nodecount + nodecount * sizeof(int) < INT_MAX); 4603 spin->si_memtot += (int)(nodecount + nodecount * sizeof(int)); 4604 4605 // Write the nodes. 4606 put_node(fd, tree, 0, regionmask, round == 3); 4607 } 4608 4609 // Write another byte to check for errors (file system full). 4610 if (putc(0, fd) == EOF) { 4611 retval = FAIL; 4612 } 4613 theend: 4614 if (fclose(fd) == EOF) { 4615 retval = FAIL; 4616 } 4617 4618 if (fwv != 1) { 4619 retval = FAIL; 4620 } 4621 if (retval == FAIL) { 4622 emsg(_(e_write)); 4623 } 4624 4625 return retval; 4626 } 4627 4628 // Clear the index and wnode fields of "node", it siblings and its 4629 // children. This is needed because they are a union with other items to save 4630 // space. 4631 static void clear_node(wordnode_T *node) 4632 { 4633 if (node != NULL) { 4634 for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) { 4635 np->wn_u1.index = 0; 4636 np->wn_u2.wnode = NULL; 4637 4638 if (np->wn_byte != NUL) { 4639 clear_node(np->wn_child); 4640 } 4641 } 4642 } 4643 } 4644 4645 /// Dump a word tree at node "node". 4646 /// 4647 /// This first writes the list of possible bytes (siblings). Then for each 4648 /// byte recursively write the children. 4649 /// 4650 /// NOTE: The code here must match the code in read_tree_node(), since 4651 /// assumptions are made about the indexes (so that we don't have to write them 4652 /// in the file). 4653 /// 4654 /// @param fd NULL when only counting 4655 /// @param prefixtree true for PREFIXTREE 4656 /// 4657 /// @return the number of nodes used. 4658 static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, bool prefixtree) 4659 { 4660 // If "node" is zero the tree is empty. 4661 if (node == NULL) { 4662 return 0; 4663 } 4664 4665 // Store the index where this node is written. 4666 node->wn_u1.index = idx; 4667 4668 // Count the number of siblings. 4669 int siblingcount = 0; 4670 for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) { 4671 siblingcount++; 4672 } 4673 4674 // Write the sibling count. 4675 if (fd != NULL) { 4676 putc(siblingcount, fd); // <siblingcount> 4677 } 4678 // Write each sibling byte and optionally extra info. 4679 for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) { 4680 if (np->wn_byte == 0) { 4681 if (fd != NULL) { 4682 // For a NUL byte (end of word) write the flags etc. 4683 if (prefixtree) { 4684 // In PREFIXTREE write the required affixID and the 4685 // associated condition nr (stored in wn_region). The 4686 // byte value is misused to store the "rare" and "not 4687 // combining" flags 4688 if (np->wn_flags == (uint16_t)PFX_FLAGS) { 4689 putc(BY_NOFLAGS, fd); // <byte> 4690 } else { 4691 putc(BY_FLAGS, fd); // <byte> 4692 putc(np->wn_flags, fd); // <pflags> 4693 } 4694 putc(np->wn_affixID, fd); // <affixID> 4695 put_bytes(fd, (uintmax_t)np->wn_region, 2); // <prefcondnr> 4696 } else { 4697 // For word trees we write the flag/region items. 4698 int flags = np->wn_flags; 4699 if (regionmask != 0 && np->wn_region != regionmask) { 4700 flags |= WF_REGION; 4701 } 4702 if (np->wn_affixID != 0) { 4703 flags |= WF_AFX; 4704 } 4705 if (flags == 0) { 4706 // word without flags or region 4707 putc(BY_NOFLAGS, fd); // <byte> 4708 } else { 4709 if (np->wn_flags >= 0x100) { 4710 putc(BY_FLAGS2, fd); // <byte> 4711 putc(flags, fd); // <flags> 4712 putc((int)((unsigned)flags >> 8), fd); // <flags2> 4713 } else { 4714 putc(BY_FLAGS, fd); // <byte> 4715 putc(flags, fd); // <flags> 4716 } 4717 if (flags & WF_REGION) { 4718 putc(np->wn_region, fd); // <region> 4719 } 4720 if (flags & WF_AFX) { 4721 putc(np->wn_affixID, fd); // <affixID> 4722 } 4723 } 4724 } 4725 } 4726 } else { 4727 if (np->wn_child->wn_u1.index != 0 4728 && np->wn_child->wn_u2.wnode != node) { 4729 // The child is written elsewhere, write the reference. 4730 if (fd != NULL) { 4731 putc(BY_INDEX, fd); // <byte> 4732 put_bytes(fd, (uintmax_t)np->wn_child->wn_u1.index, 3); // <nodeidx> 4733 } 4734 } else if (np->wn_child->wn_u2.wnode == NULL) { 4735 // We will write the child below and give it an index. 4736 np->wn_child->wn_u2.wnode = node; 4737 } 4738 4739 if (fd != NULL) { 4740 if (putc(np->wn_byte, fd) == EOF) { // <byte> or <xbyte> 4741 emsg(_(e_write)); 4742 return 0; 4743 } 4744 } 4745 } 4746 } 4747 4748 // Space used in the array when reading: one for each sibling and one for 4749 // the count. 4750 int newindex = idx + siblingcount + 1; 4751 4752 // Recursively dump the children of each sibling. 4753 for (wordnode_T *np = node; np != NULL; np = np->wn_sibling) { 4754 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) { 4755 newindex = put_node(fd, np->wn_child, newindex, regionmask, 4756 prefixtree); 4757 } 4758 } 4759 4760 return newindex; 4761 } 4762 4763 // ":mkspell [-ascii] outfile infile ..." 4764 // ":mkspell [-ascii] addfile" 4765 void ex_mkspell(exarg_T *eap) 4766 { 4767 int fcount; 4768 char **fnames; 4769 char *arg = eap->arg; 4770 bool ascii = false; 4771 4772 if (strncmp(arg, "-ascii", 6) == 0) { 4773 ascii = true; 4774 arg = skipwhite(arg + 6); 4775 } 4776 4777 // Expand all the remaining arguments (e.g., $VIMRUNTIME). 4778 if (get_arglist_exp(arg, &fcount, &fnames, false) != OK) { 4779 return; 4780 } 4781 4782 mkspell(fcount, fnames, ascii, eap->forceit, false); 4783 FreeWild(fcount, fnames); 4784 } 4785 4786 // Create the .sug file. 4787 // Uses the soundfold info in "spin". 4788 // Writes the file with the name "wfname", with ".spl" changed to ".sug". 4789 static void spell_make_sugfile(spellinfo_T *spin, char *wfname) 4790 { 4791 char *fname = NULL; 4792 slang_T *slang; 4793 bool free_slang = false; 4794 4795 // Read back the .spl file that was written. This fills the required 4796 // info for soundfolding. This also uses less memory than the 4797 // pointer-linked version of the trie. And it avoids having two versions 4798 // of the code for the soundfolding stuff. 4799 // It might have been done already by spell_reload_one(). 4800 for (slang = first_lang; slang != NULL; slang = slang->sl_next) { 4801 if (path_full_compare(wfname, slang->sl_fname, false, true) 4802 == kEqualFiles) { 4803 break; 4804 } 4805 } 4806 if (slang == NULL) { 4807 spell_message(spin, _("Reading back spell file...")); 4808 slang = spell_load_file(wfname, NULL, NULL, false); 4809 if (slang == NULL) { 4810 return; 4811 } 4812 free_slang = true; 4813 } 4814 4815 // Clear the info in "spin" that is used. 4816 spin->si_blocks = NULL; 4817 spin->si_blocks_cnt = 0; 4818 spin->si_compress_cnt = 0; // will stay at 0 all the time 4819 spin->si_free_count = 0; 4820 spin->si_first_free = NULL; 4821 spin->si_foldwcount = 0; 4822 4823 // Go through the trie of good words, soundfold each word and add it to 4824 // the soundfold trie. 4825 spell_message(spin, _("Performing soundfolding...")); 4826 if (sug_filltree(spin, slang) == FAIL) { 4827 goto theend; 4828 } 4829 4830 // Create the table which links each soundfold word with a list of the 4831 // good words it may come from. Creates buffer "spin->si_spellbuf". 4832 // This also removes the wordnr from the NUL byte entries to make 4833 // compression possible. 4834 if (sug_maketable(spin) == FAIL) { 4835 goto theend; 4836 } 4837 4838 smsg(0, _("Number of words after soundfolding: %" PRId64), 4839 (int64_t)spin->si_spellbuf->b_ml.ml_line_count); 4840 4841 // Compress the soundfold trie. 4842 spell_message(spin, _(msg_compressing)); 4843 wordtree_compress(spin, spin->si_foldroot, "case-folded"); 4844 4845 // Write the .sug file. 4846 // Make the file name by changing ".spl" to ".sug". 4847 fname = xmalloc(MAXPATHL); 4848 xstrlcpy(fname, wfname, MAXPATHL); 4849 int len = (int)strlen(fname); 4850 fname[len - 2] = 'u'; 4851 fname[len - 1] = 'g'; 4852 sug_write(spin, fname); 4853 4854 theend: 4855 xfree(fname); 4856 if (free_slang) { 4857 slang_free(slang); 4858 } 4859 free_blocks(spin->si_blocks); 4860 close_spellbuf(spin->si_spellbuf); 4861 } 4862 4863 // Build the soundfold trie for language "slang". 4864 static int sug_filltree(spellinfo_T *spin, slang_T *slang) 4865 { 4866 idx_T arridx[MAXWLEN]; 4867 int curi[MAXWLEN]; 4868 char tword[MAXWLEN]; 4869 char tsalword[MAXWLEN]; 4870 unsigned words_done = 0; 4871 int wordcount[MAXWLEN]; 4872 4873 // We use si_foldroot for the soundfolded trie. 4874 spin->si_foldroot = wordtree_alloc(spin); 4875 4876 // Let tree_add_word() know we're adding to the soundfolded tree 4877 spin->si_sugtree = true; 4878 4879 // Go through the whole case-folded tree, soundfold each word and put it 4880 // in the trie. Bail out if the tree is empty. 4881 uint8_t *byts = slang->sl_fbyts; 4882 idx_T *idxs = slang->sl_fidxs; 4883 if (byts == NULL || idxs == NULL) { 4884 return FAIL; 4885 } 4886 4887 arridx[0] = 0; 4888 curi[0] = 1; 4889 wordcount[0] = 0; 4890 4891 int depth = 0; 4892 while (depth >= 0 && !got_int) { 4893 if (curi[depth] > byts[arridx[depth]]) { 4894 // Done all bytes at this node, go up one level. 4895 idxs[arridx[depth]] = wordcount[depth]; 4896 if (depth > 0) { 4897 wordcount[depth - 1] += wordcount[depth]; 4898 } 4899 4900 depth--; 4901 line_breakcheck(); 4902 } else { 4903 // Do one more byte at this node. 4904 idx_T n = arridx[depth] + curi[depth]; 4905 curi[depth]++; 4906 4907 int c = byts[n]; 4908 if (c == 0) { 4909 // Sound-fold the word. 4910 tword[depth] = NUL; 4911 spell_soundfold(slang, tword, true, tsalword); 4912 4913 // We use the "flags" field for the MSB of the wordnr, 4914 // "region" for the LSB of the wordnr. 4915 if (tree_add_word(spin, tsalword, spin->si_foldroot, 4916 (int)(words_done >> 16), words_done & 0xffff, 4917 0) == FAIL) { 4918 return FAIL; 4919 } 4920 4921 words_done++; 4922 wordcount[depth]++; 4923 4924 // Reset the block count each time to avoid compression 4925 // kicking in. 4926 spin->si_blocks_cnt = 0; 4927 4928 // Skip over any other NUL bytes (same word with different 4929 // flags). But don't go over the end. 4930 while (n + 1 < slang->sl_fbyts_len && byts[n + 1] == 0) { 4931 n++; 4932 curi[depth]++; 4933 } 4934 } else { 4935 // Normal char, go one level deeper. 4936 tword[depth++] = (char)(uint8_t)c; 4937 arridx[depth] = idxs[n]; 4938 curi[depth] = 1; 4939 wordcount[depth] = 0; 4940 } 4941 } 4942 } 4943 4944 smsg(0, _("Total number of words: %d"), words_done); 4945 4946 return OK; 4947 } 4948 4949 // Make the table that links each word in the soundfold trie to the words it 4950 // can be produced from. 4951 // This is not unlike lines in a file, thus use a memfile to be able to access 4952 // the table efficiently. 4953 // Returns FAIL when out of memory. 4954 static int sug_maketable(spellinfo_T *spin) 4955 { 4956 garray_T ga; 4957 int res = OK; 4958 4959 // Allocate a buffer, open a memline for it and create the swap file 4960 // (uses a temp file, not a .swp file). 4961 spin->si_spellbuf = open_spellbuf(); 4962 4963 // Use a buffer to store the line info, avoids allocating many small 4964 // pieces of memory. 4965 ga_init(&ga, 1, 100); 4966 4967 // recursively go through the tree 4968 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) { 4969 res = FAIL; 4970 } 4971 4972 ga_clear(&ga); 4973 return res; 4974 } 4975 4976 /// Fill the table for one node and its children. 4977 /// Returns the wordnr at the start of the node. 4978 /// Returns -1 when out of memory. 4979 /// 4980 /// @param gap place to store line of numbers 4981 static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap) 4982 { 4983 int wordnr = startwordnr; 4984 4985 for (wordnode_T *p = node; p != NULL; p = p->wn_sibling) { 4986 if (p->wn_byte == NUL) { 4987 gap->ga_len = 0; 4988 int prev_nr = 0; 4989 for (wordnode_T *np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) { 4990 ga_grow(gap, 10); 4991 4992 int nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); 4993 // Compute the offset from the previous nr and store the 4994 // offset in a way that it takes a minimum number of bytes. 4995 // It's a bit like utf-8, but without the need to mark 4996 // following bytes. 4997 nr -= prev_nr; 4998 prev_nr += nr; 4999 gap->ga_len += offset2bytes(nr, (char *)gap->ga_data + gap->ga_len); 5000 } 5001 5002 // add the NUL byte 5003 ((char *)gap->ga_data)[gap->ga_len++] = NUL; 5004 5005 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, 5006 gap->ga_data, gap->ga_len, true) == FAIL) { 5007 return -1; 5008 } 5009 wordnr++; 5010 5011 // Remove extra NUL entries, we no longer need them. We don't 5012 // bother freeing the nodes, they won't be reused anyway. 5013 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) { 5014 p->wn_sibling = p->wn_sibling->wn_sibling; 5015 } 5016 5017 // Clear the flags on the remaining NUL node, so that compression 5018 // works a lot better. 5019 p->wn_flags = 0; 5020 p->wn_region = 0; 5021 } else { 5022 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); 5023 if (wordnr == -1) { 5024 return -1; 5025 } 5026 } 5027 } 5028 return wordnr; 5029 } 5030 5031 // Convert an offset into a minimal number of bytes. 5032 // Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL 5033 // bytes. 5034 static int offset2bytes(int nr, char *buf_in) 5035 { 5036 uint8_t *buf = (uint8_t *)buf_in; 5037 5038 // Split the number in parts of base 255. We need to avoid NUL bytes. 5039 int b1 = nr % 255 + 1; 5040 int rem = nr / 255; 5041 int b2 = rem % 255 + 1; 5042 rem = rem / 255; 5043 int b3 = rem % 255 + 1; 5044 int b4 = rem / 255 + 1; 5045 5046 if (b4 > 1 || b3 > 0x1f) { // 4 bytes 5047 buf[0] = (uint8_t)(0xe0 + b4); 5048 buf[1] = (uint8_t)b3; 5049 buf[2] = (uint8_t)b2; 5050 buf[3] = (uint8_t)b1; 5051 return 4; 5052 } 5053 if (b3 > 1 || b2 > 0x3f) { // 3 bytes 5054 buf[0] = (uint8_t)(0xc0 + b3); 5055 buf[1] = (uint8_t)b2; 5056 buf[2] = (uint8_t)b1; 5057 return 3; 5058 } 5059 if (b2 > 1 || b1 > 0x7f) { // 2 bytes 5060 buf[0] = (uint8_t)(0x80 + b2); 5061 buf[1] = (uint8_t)b1; 5062 return 2; 5063 } 5064 // 1 byte 5065 buf[0] = (uint8_t)b1; 5066 return 1; 5067 } 5068 5069 // Write the .sug file in "fname". 5070 static void sug_write(spellinfo_T *spin, char *fname) 5071 { 5072 // Create the file. Note that an existing file is silently overwritten! 5073 FILE *fd = os_fopen(fname, "w"); 5074 if (fd == NULL) { 5075 semsg(_(e_notopen), fname); 5076 return; 5077 } 5078 5079 vim_snprintf(IObuff, IOSIZE, 5080 _("Writing suggestion file %s..."), fname); 5081 spell_message(spin, IObuff); 5082 5083 // <SUGHEADER>: <fileID> <versionnr> <timestamp> 5084 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, 1, fd) != 1) { // <fileID> 5085 emsg(_(e_write)); 5086 goto theend; 5087 } 5088 putc(VIMSUGVERSION, fd); // <versionnr> 5089 5090 // Write si_sugtime to the file. 5091 put_time(fd, spin->si_sugtime); // <timestamp> 5092 5093 // <SUGWORDTREE> 5094 spin->si_memtot = 0; 5095 wordnode_T *tree = spin->si_foldroot->wn_sibling; 5096 5097 // Clear the index and wnode fields in the tree. 5098 clear_node(tree); 5099 5100 // Count the number of nodes. Needed to be able to allocate the 5101 // memory when reading the nodes. Also fills in index for shared 5102 // nodes. 5103 size_t nodecount = (size_t)put_node(NULL, tree, 0, 0, false); 5104 5105 // number of nodes in 4 bytes 5106 put_bytes(fd, nodecount, 4); // <nodecount> 5107 assert(nodecount + nodecount * sizeof(int) < INT_MAX); 5108 spin->si_memtot += (int)(nodecount + nodecount * sizeof(int)); 5109 5110 // Write the nodes. 5111 put_node(fd, tree, 0, 0, false); 5112 5113 // <SUGTABLE>: <sugwcount> <sugline> ... 5114 linenr_T wcount = spin->si_spellbuf->b_ml.ml_line_count; 5115 assert(wcount >= 0); 5116 put_bytes(fd, (uintmax_t)wcount, 4); // <sugwcount> 5117 5118 for (linenr_T lnum = 1; lnum <= wcount; lnum++) { 5119 // <sugline>: <sugnr> ... NUL 5120 char *line = ml_get_buf(spin->si_spellbuf, lnum); 5121 int len = ml_get_buf_len(spin->si_spellbuf, lnum) + 1; 5122 if (fwrite(line, (size_t)len, 1, fd) == 0) { 5123 emsg(_(e_write)); 5124 goto theend; 5125 } 5126 spin->si_memtot += len; 5127 } 5128 5129 // Write another byte to check for errors. 5130 if (putc(0, fd) == EOF) { 5131 emsg(_(e_write)); 5132 } 5133 5134 vim_snprintf(IObuff, IOSIZE, 5135 _("Estimated runtime memory use: %d bytes"), spin->si_memtot); 5136 spell_message(spin, IObuff); 5137 5138 theend: 5139 // close the file 5140 fclose(fd); 5141 } 5142 5143 /// Create a Vim spell file from one or more word lists. 5144 /// "fnames[0]" is the output file name. 5145 /// "fnames[fcount - 1]" is the last input file name. 5146 /// Exception: when "fnames[0]" ends in ".add" it's used as the input file name 5147 /// and ".spl" is appended to make the output file name. 5148 /// 5149 /// @param ascii -ascii argument given 5150 /// @param over_write overwrite existing output file 5151 /// @param added_word invoked through "zg" 5152 static void mkspell(int fcount, char **fnames, bool ascii, bool over_write, bool added_word) 5153 { 5154 char *fname = NULL; 5155 afffile_T *(afile[MAXREGIONS]); 5156 bool error = false; 5157 spellinfo_T spin; 5158 5159 CLEAR_FIELD(spin); 5160 spin.si_verbose = !added_word; 5161 spin.si_ascii = ascii; 5162 spin.si_followup = true; 5163 spin.si_rem_accents = true; 5164 ga_init(&spin.si_rep, (int)sizeof(fromto_T), 20); 5165 ga_init(&spin.si_repsal, (int)sizeof(fromto_T), 20); 5166 ga_init(&spin.si_sal, (int)sizeof(fromto_T), 20); 5167 ga_init(&spin.si_map, (int)sizeof(char), 100); 5168 ga_init(&spin.si_comppat, (int)sizeof(char *), 20); 5169 ga_init(&spin.si_prefcond, (int)sizeof(char *), 50); 5170 hash_init(&spin.si_commonwords); 5171 spin.si_newcompID = 127; // start compound ID at first maximum 5172 5173 // default: fnames[0] is output file, following are input files 5174 // When "fcount" is 1 there is only one file. 5175 char **innames = &fnames[fcount == 1 ? 0 : 1]; 5176 int incount = fcount - 1; 5177 5178 char *wfname = xmalloc(MAXPATHL); 5179 5180 if (fcount >= 1) { 5181 int len = (int)strlen(fnames[0]); 5182 if (fcount == 1 && len > 4 && strcmp(fnames[0] + len - 4, ".add") == 0) { 5183 // For ":mkspell path/en.latin1.add" output file is 5184 // "path/en.latin1.add.spl". 5185 incount = 1; 5186 vim_snprintf(wfname, MAXPATHL, "%s.spl", fnames[0]); 5187 } else if (fcount == 1) { 5188 // For ":mkspell path/vim" output file is "path/vim.latin1.spl". 5189 incount = 1; 5190 vim_snprintf(wfname, MAXPATHL, SPL_FNAME_TMPL, 5191 fnames[0], spin.si_ascii ? "ascii" : spell_enc()); 5192 } else if (len > 4 && strcmp(fnames[0] + len - 4, ".spl") == 0) { 5193 // Name ends in ".spl", use as the file name. 5194 xstrlcpy(wfname, fnames[0], MAXPATHL); 5195 } else { 5196 // Name should be language, make the file name from it. 5197 vim_snprintf(wfname, MAXPATHL, SPL_FNAME_TMPL, 5198 fnames[0], spin.si_ascii ? "ascii" : spell_enc()); 5199 } 5200 5201 // Check for .ascii.spl. 5202 if (strstr(path_tail(wfname), SPL_FNAME_ASCII) != NULL) { 5203 spin.si_ascii = true; 5204 } 5205 5206 // Check for .add.spl. 5207 if (strstr(path_tail(wfname), SPL_FNAME_ADD) != NULL) { 5208 spin.si_add = true; 5209 } 5210 } 5211 5212 if (incount <= 0) { 5213 emsg(_(e_invarg)); // need at least output and input names 5214 } else if (vim_strchr(path_tail(wfname), '_') != NULL) { 5215 emsg(_("E751: Output file name must not have region name")); 5216 } else if (incount > MAXREGIONS) { 5217 semsg(_("E754: Only up to %d regions supported"), MAXREGIONS); 5218 } else { 5219 // Check for overwriting before doing things that may take a lot of 5220 // time. 5221 if (!over_write && os_path_exists(wfname)) { 5222 emsg(_(e_exists)); 5223 goto theend; 5224 } 5225 if (os_isdir(wfname)) { 5226 semsg(_(e_isadir2), wfname); 5227 goto theend; 5228 } 5229 5230 fname = xmalloc(MAXPATHL); 5231 5232 // Init the aff and dic pointers. 5233 // Get the region names if there are more than 2 arguments. 5234 for (int i = 0; i < incount; i++) { 5235 afile[i] = NULL; 5236 5237 if (incount > 1) { 5238 int len = (int)strlen(innames[i]); 5239 if (strlen(path_tail(innames[i])) < 5 5240 || innames[i][len - 3] != '_') { 5241 semsg(_("E755: Invalid region in %s"), innames[i]); 5242 goto theend; 5243 } 5244 spin.si_region_name[i * 2] = (char)(uint8_t)TOLOWER_ASC(innames[i][len - 2]); 5245 spin.si_region_name[i * 2 + 1] = (char)(uint8_t)TOLOWER_ASC(innames[i][len - 1]); 5246 } 5247 } 5248 spin.si_region_count = incount; 5249 5250 spin.si_foldroot = wordtree_alloc(&spin); 5251 spin.si_keeproot = wordtree_alloc(&spin); 5252 spin.si_prefroot = wordtree_alloc(&spin); 5253 5254 // When not producing a .add.spl file clear the character table when 5255 // we encounter one in the .aff file. This means we dump the current 5256 // one in the .spl file if the .aff file doesn't define one. That's 5257 // better than guessing the contents, the table will match a 5258 // previously loaded spell file. 5259 if (!spin.si_add) { 5260 spin.si_clear_chartab = true; 5261 } 5262 5263 // Read all the .aff and .dic files. 5264 // Text is converted to 'encoding'. 5265 // Words are stored in the case-folded and keep-case trees. 5266 for (int i = 0; i < incount && !error; i++) { 5267 spin.si_conv.vc_type = CONV_NONE; 5268 spin.si_region = 1 << i; 5269 5270 vim_snprintf(fname, MAXPATHL, "%s.aff", innames[i]); 5271 if (os_path_exists(fname)) { 5272 // Read the .aff file. Will init "spin->si_conv" based on the 5273 // "SET" line. 5274 afile[i] = spell_read_aff(&spin, fname); 5275 if (afile[i] == NULL) { 5276 error = true; 5277 } else { 5278 // Read the .dic file and store the words in the trees. 5279 vim_snprintf(fname, MAXPATHL, "%s.dic", innames[i]); 5280 if (spell_read_dic(&spin, fname, afile[i]) == FAIL) { 5281 error = true; 5282 } 5283 } 5284 } else { 5285 // No .aff file, try reading the file as a word list. Store 5286 // the words in the trees. 5287 if (spell_read_wordfile(&spin, innames[i]) == FAIL) { 5288 error = true; 5289 } 5290 } 5291 5292 // Free any conversion stuff. 5293 convert_setup(&spin.si_conv, NULL, NULL); 5294 } 5295 5296 if (spin.si_compflags != NULL && spin.si_nobreak) { 5297 msg(_("Warning: both compounding and NOBREAK specified"), 0); 5298 } 5299 5300 if (!error && !got_int) { 5301 // Combine tails in the tree. 5302 spell_message(&spin, _(msg_compressing)); 5303 wordtree_compress(&spin, spin.si_foldroot, "case-folded"); 5304 wordtree_compress(&spin, spin.si_keeproot, "keep-case"); 5305 wordtree_compress(&spin, spin.si_prefroot, "prefixes"); 5306 } 5307 5308 if (!error && !got_int) { 5309 // Write the info in the spell file. 5310 vim_snprintf(IObuff, IOSIZE, 5311 _("Writing spell file %s..."), wfname); 5312 spell_message(&spin, IObuff); 5313 5314 error = write_vim_spell(&spin, wfname) == FAIL; 5315 5316 spell_message(&spin, _("Done!")); 5317 vim_snprintf(IObuff, IOSIZE, 5318 _("Estimated runtime memory use: %d bytes"), spin.si_memtot); 5319 spell_message(&spin, IObuff); 5320 5321 // If the file is loaded need to reload it. 5322 if (!error) { 5323 spell_reload_one(wfname, added_word); 5324 } 5325 } 5326 5327 // Free the allocated memory. 5328 ga_clear(&spin.si_rep); 5329 ga_clear(&spin.si_repsal); 5330 ga_clear(&spin.si_sal); 5331 ga_clear(&spin.si_map); 5332 ga_clear(&spin.si_comppat); 5333 ga_clear(&spin.si_prefcond); 5334 hash_clear_all(&spin.si_commonwords, 0); 5335 5336 // Free the .aff file structures. 5337 for (int i = 0; i < incount; i++) { 5338 if (afile[i] != NULL) { 5339 spell_free_aff(afile[i]); 5340 } 5341 } 5342 5343 // Free all the bits and pieces at once. 5344 free_blocks(spin.si_blocks); 5345 5346 // If there is soundfolding info and no NOSUGFILE item create the 5347 // .sug file with the soundfolded word trie. 5348 if (spin.si_sugtime != 0 && !error && !got_int) { 5349 spell_make_sugfile(&spin, wfname); 5350 } 5351 } 5352 5353 theend: 5354 xfree(fname); 5355 xfree(wfname); 5356 } 5357 5358 // Display a message for spell file processing when 'verbose' is set or using 5359 // ":mkspell". "str" can be IObuff. 5360 static void spell_message(const spellinfo_T *spin, char *str) 5361 FUNC_ATTR_NONNULL_ALL 5362 { 5363 if (spin->si_verbose || p_verbose > 2) { 5364 if (!spin->si_verbose) { 5365 verbose_enter(); 5366 } 5367 msg(str, 0); 5368 ui_flush(); 5369 if (!spin->si_verbose) { 5370 verbose_leave(); 5371 } 5372 } 5373 } 5374 5375 // ":[count]spellgood {word}" 5376 // ":[count]spellwrong {word}" 5377 // ":[count]spellundo {word}" 5378 // ":[count]spellrare {word}" 5379 void ex_spell(exarg_T *eap) 5380 { 5381 spell_add_word(eap->arg, (int)strlen(eap->arg), 5382 eap->cmdidx == CMD_spellwrong 5383 ? SPELL_ADD_BAD 5384 : eap->cmdidx == CMD_spellrare ? SPELL_ADD_RARE : SPELL_ADD_GOOD, 5385 eap->forceit ? 0 : (int)eap->line2, 5386 eap->cmdidx == CMD_spellundo); 5387 } 5388 5389 /// Add "word[len]" to 'spellfile' as a good or bad word. 5390 /// 5391 /// @param what SPELL_ADD_ values 5392 /// @param idx "zG" and "zW": zero, otherwise index in 'spellfile' 5393 /// @param bool // true for "zug", "zuG", "zuw" and "zuW" 5394 void spell_add_word(char *word, int len, SpellAddType what, int idx, bool undo) 5395 { 5396 FILE *fd = NULL; 5397 buf_T *buf = NULL; 5398 bool new_spf = false; 5399 char *fname; 5400 char *fnamebuf = NULL; 5401 char line[MAXWLEN * 2]; 5402 char *spf; 5403 5404 if (!valid_spell_word(word, word + len)) { 5405 emsg(_(e_illegal_character_in_word)); 5406 return; 5407 } 5408 5409 if (idx == 0) { // use internal wordlist 5410 if (int_wordlist == NULL) { 5411 int_wordlist = vim_tempname(); 5412 if (int_wordlist == NULL) { 5413 return; 5414 } 5415 } 5416 fname = int_wordlist; 5417 } else { 5418 int i; 5419 // If 'spellfile' isn't set figure out a good default value. 5420 if (*curwin->w_s->b_p_spf == NUL) { 5421 init_spellfile(); 5422 new_spf = true; 5423 } 5424 5425 if (*curwin->w_s->b_p_spf == NUL) { 5426 semsg(_(e_notset), "spellfile"); 5427 return; 5428 } 5429 fnamebuf = xmalloc(MAXPATHL); 5430 5431 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; i++) { 5432 copy_option_part(&spf, fnamebuf, MAXPATHL, ","); 5433 if (i == idx) { 5434 break; 5435 } 5436 if (*spf == NUL) { 5437 semsg(_("E765: 'spellfile' does not have %d entries"), idx); 5438 xfree(fnamebuf); 5439 return; 5440 } 5441 } 5442 5443 // Check that the user isn't editing the .add file somewhere. 5444 buf = buflist_findname_exp(fnamebuf); 5445 if (buf != NULL && buf->b_ml.ml_mfp == NULL) { 5446 buf = NULL; 5447 } 5448 if (buf != NULL && bufIsChanged(buf)) { 5449 emsg(_(e_bufloaded)); 5450 xfree(fnamebuf); 5451 return; 5452 } 5453 5454 fname = fnamebuf; 5455 } 5456 5457 if (what == SPELL_ADD_BAD || undo) { 5458 int fpos_next = 0; 5459 int fpos = 0; 5460 // When the word appears as good word we need to remove that one, 5461 // since its flags sort before the one with WF_BANNED. 5462 fd = os_fopen(fname, "r"); 5463 if (fd != NULL) { 5464 while (!vim_fgets(line, MAXWLEN * 2, fd)) { 5465 fpos = fpos_next; 5466 fpos_next = (int)ftell(fd); 5467 if (fpos_next < 0) { 5468 break; // should never happen 5469 } 5470 if (strncmp(word, line, (size_t)len) == 0 5471 && (line[len] == '/' || (uint8_t)line[len] < ' ')) { 5472 // Found duplicate word. Remove it by writing a '#' at 5473 // the start of the line. Mixing reading and writing 5474 // doesn't work for all systems, close the file first. 5475 fclose(fd); 5476 fd = os_fopen(fname, "r+"); 5477 if (fd == NULL) { 5478 break; 5479 } 5480 if (fseek(fd, fpos, SEEK_SET) == 0) { 5481 fputc('#', fd); 5482 if (undo) { 5483 home_replace(NULL, fname, NameBuff, MAXPATHL, true); 5484 smsg(0, _("Word '%.*s' removed from %s"), len, word, NameBuff); 5485 } 5486 } 5487 if (fseek(fd, fpos_next, SEEK_SET) != 0) { 5488 PERROR(_("Seek error in spellfile")); 5489 break; 5490 } 5491 } 5492 } 5493 if (fd != NULL) { 5494 fclose(fd); 5495 } 5496 } 5497 } 5498 5499 if (!undo) { 5500 fd = os_fopen(fname, "a"); 5501 if (fd == NULL && new_spf) { 5502 char *p; 5503 5504 // We just initialized the 'spellfile' option and can't open the 5505 // file. We may need to create the "spell" directory first. We 5506 // already checked the runtime directory is writable in 5507 // init_spellfile(). 5508 if (!dir_of_file_exists(fname) 5509 && (p = path_tail_with_sep(fname)) != fname) { 5510 char c = *p; 5511 5512 // The directory doesn't exist. Try creating it and opening 5513 // the file again. 5514 *p = NUL; 5515 os_mkdir(fname, 0755); 5516 *p = c; 5517 fd = os_fopen(fname, "a"); 5518 } 5519 } 5520 5521 if (fd == NULL) { 5522 semsg(_(e_notopen), fname); 5523 } else { 5524 if (what == SPELL_ADD_BAD) { 5525 fprintf(fd, "%.*s/!\n", len, word); 5526 } else if (what == SPELL_ADD_RARE) { 5527 fprintf(fd, "%.*s/?\n", len, word); 5528 } else { 5529 fprintf(fd, "%.*s\n", len, word); 5530 } 5531 fclose(fd); 5532 5533 home_replace(NULL, fname, NameBuff, MAXPATHL, true); 5534 smsg(0, _("Word '%.*s' added to %s"), len, word, NameBuff); 5535 } 5536 } 5537 5538 if (fd != NULL) { 5539 // Update the .add.spl file. 5540 mkspell(1, &fname, false, true, true); 5541 5542 // If the .add file is edited somewhere, reload it. 5543 if (buf != NULL) { 5544 buf_reload(buf, buf->b_orig_mode, false); 5545 } 5546 5547 redraw_all_later(UPD_SOME_VALID); 5548 } 5549 xfree(fnamebuf); 5550 } 5551 5552 // Initialize 'spellfile' for the current buffer. 5553 // 5554 // If the location does not exist, create it. Defaults to 5555 // stdpath("data") + "/site/spell/{spelllang}.{encoding}.add". 5556 static void init_spellfile(void) 5557 { 5558 char *lend; 5559 bool aspath = false; 5560 char *lstart = curbuf->b_s.b_p_spl; 5561 5562 if (*curwin->w_s->b_p_spl == NUL || GA_EMPTY(&curwin->w_s->b_langp)) { 5563 return; 5564 } 5565 5566 // Find the end of the language name. Exclude the region. If there 5567 // is a path separator remember the start of the tail. 5568 for (lend = curwin->w_s->b_p_spl; *lend != NUL 5569 && vim_strchr(",._", (uint8_t)(*lend)) == NULL; lend++) { 5570 if (vim_ispathsep(*lend)) { 5571 aspath = true; 5572 lstart = lend + 1; 5573 } 5574 } 5575 5576 char *buf = xmalloc(MAXPATHL); 5577 size_t buf_len = MAXPATHL; 5578 5579 if (!aspath) { 5580 char *xdg_path = get_xdg_home(kXDGDataHome); 5581 xstrlcpy(buf, xdg_path, buf_len); 5582 xfree(xdg_path); 5583 5584 xstrlcat(buf, "/site/spell", buf_len); 5585 5586 char *failed_dir; 5587 if (os_mkdir_recurse(buf, 0755, &failed_dir, NULL) != 0) { 5588 xfree(buf); 5589 xfree(failed_dir); 5590 return; 5591 } 5592 } else { 5593 if ((size_t)(lend - curbuf->b_s.b_p_spl) >= buf_len) { 5594 xfree(buf); 5595 return; 5596 } 5597 xmemcpyz(buf, curbuf->b_s.b_p_spl, (size_t)(lend - curbuf->b_s.b_p_spl)); 5598 } 5599 5600 // Append spelllang 5601 vim_snprintf(buf + strlen(buf), buf_len - strlen(buf), "/%.*s", (int)(lend - lstart), lstart); 5602 5603 // Append ".ascii.add" or ".{enc}.add" 5604 char *fname = LANGP_ENTRY(curwin->w_s->b_langp, 0)->lp_slang->sl_fname; 5605 const char *enc_suffix = 5606 (fname != NULL && strstr(path_tail(fname), ".ascii.") != NULL) ? "ascii" : spell_enc(); 5607 vim_snprintf(buf + strlen(buf), buf_len - strlen(buf), ".%s.add", enc_suffix); 5608 5609 set_option_value_give_err(kOptSpellfile, CSTR_AS_OPTVAL(buf), OPT_LOCAL); 5610 xfree(buf); 5611 } 5612 5613 /// Set the spell character tables from strings in the .spl file. 5614 /// 5615 /// @param cnt length of "flags" 5616 static void set_spell_charflags(const char *flags_in, int cnt, const char *fol) 5617 { 5618 const uint8_t *flags = (uint8_t *)flags_in; 5619 // We build the new tables here first, so that we can compare with the 5620 // previous one. 5621 spelltab_T new_st; 5622 const char *p = fol; 5623 5624 clear_spell_chartab(&new_st); 5625 5626 for (int i = 0; i < 128; i++) { 5627 if (i < cnt) { 5628 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; 5629 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; 5630 } 5631 5632 if (*p != NUL) { 5633 int c = mb_ptr2char_adv(&p); 5634 new_st.st_fold[i + 128] = (uint8_t)c; 5635 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) { 5636 new_st.st_upper[c] = (uint8_t)(i + 128); 5637 } 5638 } 5639 } 5640 5641 set_spell_finish(&new_st); 5642 } 5643 5644 static int set_spell_finish(spelltab_T *new_st) 5645 { 5646 if (did_set_spelltab) { 5647 // check that it's the same table 5648 for (int i = 0; i < 256; i++) { 5649 if (spelltab.st_isw[i] != new_st->st_isw[i] 5650 || spelltab.st_isu[i] != new_st->st_isu[i] 5651 || spelltab.st_fold[i] != new_st->st_fold[i] 5652 || spelltab.st_upper[i] != new_st->st_upper[i]) { 5653 emsg(_("E763: Word characters differ between spell files")); 5654 return FAIL; 5655 } 5656 } 5657 } else { 5658 // copy the new spelltab into the one being used 5659 spelltab = *new_st; 5660 did_set_spelltab = true; 5661 } 5662 5663 return OK; 5664 } 5665 5666 // Write the table with prefix conditions to the .spl file. 5667 // When "fd" is NULL only count the length of what is written. 5668 static int write_spell_prefcond(FILE *fd, garray_T *gap, size_t *fwv) 5669 { 5670 assert(gap->ga_len >= 0); 5671 5672 if (fd != NULL) { 5673 put_bytes(fd, (uintmax_t)gap->ga_len, 2); // <prefcondcnt> 5674 } 5675 size_t totlen = 2 + (size_t)gap->ga_len; // <prefcondcnt> and <condlen> bytes 5676 for (int i = 0; i < gap->ga_len; i++) { 5677 // <prefcond> : <condlen> <condstr> 5678 char *p = ((char **)gap->ga_data)[i]; 5679 if (p != NULL) { 5680 size_t len = strlen(p); 5681 if (fd != NULL) { 5682 assert(len <= INT_MAX); 5683 fputc((int)len, fd); 5684 *fwv &= fwrite(p, len, 1, fd); 5685 } 5686 totlen += len; 5687 } else if (fd != NULL) { 5688 fputc(0, fd); 5689 } 5690 } 5691 5692 assert(totlen <= INT_MAX); 5693 return (int)totlen; 5694 } 5695 5696 // Use map string "map" for languages "lp". 5697 static void set_map_str(slang_T *lp, const char *map) 5698 { 5699 int headc = 0; 5700 5701 if (*map == NUL) { 5702 lp->sl_has_map = false; 5703 return; 5704 } 5705 lp->sl_has_map = true; 5706 5707 // Init the array and hash tables empty. 5708 for (int i = 0; i < 256; i++) { 5709 lp->sl_map_array[i] = 0; 5710 } 5711 hash_init(&lp->sl_map_hash); 5712 5713 // The similar characters are stored separated with slashes: 5714 // "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and 5715 // before the same slash. For characters above 255 sl_map_hash is used. 5716 for (const char *p = map; *p != NUL;) { 5717 int c = mb_cptr2char_adv(&p); 5718 if (c == '/') { 5719 headc = 0; 5720 } else { 5721 if (headc == 0) { 5722 headc = c; 5723 } 5724 5725 // Characters above 255 don't fit in sl_map_array[], put them in 5726 // the hash table. Each entry is the char, a NUL the headchar and 5727 // a NUL. 5728 if (c >= 256) { 5729 int cl = utf_char2len(c); 5730 int headcl = utf_char2len(headc); 5731 hash_T hash; 5732 hashitem_T *hi; 5733 5734 char *b = xmalloc((size_t)(cl + headcl) + 2); 5735 utf_char2bytes(c, b); 5736 b[cl] = NUL; 5737 utf_char2bytes(headc, b + cl + 1); 5738 b[cl + 1 + headcl] = NUL; 5739 hash = hash_hash(b); 5740 hi = hash_lookup(&lp->sl_map_hash, b, strlen(b), hash); 5741 if (HASHITEM_EMPTY(hi)) { 5742 hash_add_item(&lp->sl_map_hash, hi, b, hash); 5743 } else { 5744 // This should have been checked when generating the .spl 5745 // file. 5746 emsg(_(e_duplicate_char_in_map_entry)); 5747 xfree(b); 5748 } 5749 } else { 5750 lp->sl_map_array[c] = headc; 5751 } 5752 } 5753 } 5754 }