spell_defs.h (9493B)
1 #pragma once 2 3 #include <stdbool.h> 4 #include <stdint.h> 5 6 #include "nvim/buffer_defs.h" 7 8 enum { 9 /// Assume max. word len is this many bytes. 10 /// Some places assume a word length fits in a byte, thus it can't be above 255. 11 MAXWLEN = 254, 12 }; 13 14 enum { MAXREGIONS = 8, }; ///< Number of regions supported. 15 16 /// Type used for indexes in the word tree need to be at least 4 bytes. If int 17 /// is 8 bytes we could use something smaller, but what? 18 typedef int idx_T; 19 20 #define SPL_FNAME_TMPL "%s.%s.spl" 21 #define SPL_FNAME_ADD ".add." 22 #define SPL_FNAME_ASCII ".ascii." 23 24 /// Flags used for a word. Only the lowest byte can be used, the region byte 25 /// comes above it. 26 enum { 27 WF_REGION = 0x01, ///< region byte follows 28 WF_ONECAP = 0x02, ///< word with one capital (or all capitals) 29 WF_ALLCAP = 0x04, ///< word must be all capitals 30 WF_RARE = 0x08, ///< rare word 31 WF_BANNED = 0x10, ///< bad word 32 WF_AFX = 0x20, ///< affix ID follows 33 WF_FIXCAP = 0x40, ///< keep-case word, allcap not allowed 34 WF_KEEPCAP = 0x80, ///< keep-case word 35 WF_CAPMASK = (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP), 36 }; 37 38 /// for <flags2>, shifted up one byte to be used in wn_flags 39 enum { 40 WF_HAS_AFF = 0x0100, ///< word includes affix 41 WF_NEEDCOMP = 0x0200, ///< word only valid in compound 42 WF_NOSUGGEST = 0x0400, ///< word not to be suggested 43 WF_COMPROOT = 0x0800, ///< already compounded word, COMPOUNDROOT 44 WF_NOCOMPBEF = 0x1000, ///< no compounding before this word 45 WF_NOCOMPAFT = 0x2000, ///< no compounding after this word 46 }; 47 48 /// flags for <pflags> 49 enum { 50 WFP_RARE = 0x01, ///< rare prefix 51 WFP_NC = 0x02, ///< prefix is not combining 52 WFP_UP = 0x04, ///< to-upper prefix 53 WFP_COMPPERMIT = 0x08, ///< prefix with COMPOUNDPERMITFLAG 54 WFP_COMPFORBID = 0x10, ///< prefix with COMPOUNDFORBIDFLAG 55 }; 56 57 /// Flags for postponed prefixes in "sl_pidxs". Must be above affixID (one 58 /// byte) and prefcondnr (two bytes). 59 enum { 60 WF_RAREPFX = WFP_RARE << 24, ///< rare postponed prefix 61 WF_PFX_NC = WFP_NC << 24, ///< non-combining postponed prefix 62 WF_PFX_UP = WFP_UP << 24, ///< to-upper postponed prefix 63 WF_PFX_COMPPERMIT = WFP_COMPPERMIT << 24, ///< postponed prefix with COMPOUNDPERMITFLAG 64 WF_PFX_COMPFORBID = WFP_COMPFORBID << 24, ///< postponed prefix with COMPOUNDFORBIDFLAG 65 }; 66 67 /// flags for <compoptions> 68 enum { 69 COMP_CHECKDUP = 1, ///< CHECKCOMPOUNDDUP 70 COMP_CHECKREP = 2, ///< CHECKCOMPOUNDREP 71 COMP_CHECKCASE = 4, ///< CHECKCOMPOUNDCASE 72 COMP_CHECKTRIPLE = 8, ///< CHECKCOMPOUNDTRIPLE 73 }; 74 75 /// Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep, 76 /// si_repsal, sl_rep, and si_sal. Not for sl_sal! 77 /// One replacement: from "ft_from" to "ft_to". 78 typedef struct { 79 char *ft_from; 80 char *ft_to; 81 } fromto_T; 82 83 /// Info from "SAL" entries in ".aff" file used in sl_sal. 84 /// The info is split for quick processing by spell_soundfold(). 85 /// Note that "sm_oneof" and "sm_rules" point into sm_lead. 86 typedef struct { 87 char *sm_lead; ///< leading letters 88 int sm_leadlen; ///< length of "sm_lead" 89 char *sm_oneof; ///< letters from () or NULL 90 char *sm_rules; ///< rules like ^, $, priority 91 char *sm_to; ///< replacement. 92 int *sm_lead_w; ///< wide character copy of "sm_lead" 93 int *sm_oneof_w; ///< wide character copy of "sm_oneof" 94 int *sm_to_w; ///< wide character copy of "sm_to" 95 } salitem_T; 96 97 typedef int salfirst_T; 98 99 /// Values for SP_*ERROR are negative, positive values are used by 100 /// read_cnt_string(). 101 enum { 102 SP_TRUNCERROR = -1, ///< spell file truncated error 103 SP_FORMERROR = -2, ///< format error in spell file 104 SP_OTHERERROR = -3, ///< other error while reading spell file 105 }; 106 107 /// Structure used to store words and other info for one language, loaded from 108 /// a .spl file. 109 /// The main access is through the tree in "sl_fbyts/sl_fidxs", storing the 110 /// case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words. 111 /// 112 /// The "byts" array stores the possible bytes in each tree node, preceded by 113 /// the number of possible bytes, sorted on byte value: 114 /// <len> <byte1> <byte2> ... 115 /// The "idxs" array stores the index of the child node corresponding to the 116 /// byte in "byts". 117 /// Exception: when the byte is zero, the word may end here and "idxs" holds 118 /// the flags, region mask and affixID for the word. There may be several 119 /// zeros in sequence for alternative flag/region/affixID combinations. 120 typedef struct slang_S slang_T; 121 122 struct slang_S { 123 slang_T *sl_next; ///< next language 124 char *sl_name; ///< language name "en", "en.rare", "nl", etc. 125 char *sl_fname; ///< name of .spl file 126 bool sl_add; ///< true if it's a .add file. 127 128 uint8_t *sl_fbyts; ///< case-folded word bytes 129 int sl_fbyts_len; ///< length of sl_fbyts 130 idx_T *sl_fidxs; ///< case-folded word indexes 131 uint8_t *sl_kbyts; ///< keep-case word bytes 132 idx_T *sl_kidxs; ///< keep-case word indexes 133 uint8_t *sl_pbyts; ///< prefix tree word bytes 134 idx_T *sl_pidxs; ///< prefix tree word indexes 135 136 char *sl_info; ///< infotext string or NULL 137 138 /// table with up to 8 region names plus NUL 139 char sl_regions[MAXREGIONS * 2 + 1]; 140 141 char *sl_midword; ///< MIDWORD string or NULL 142 143 hashtab_T sl_wordcount; ///< hashtable with word count, wordcount_T 144 145 int sl_compmax; ///< COMPOUNDWORDMAX (default: MAXWLEN) 146 int sl_compminlen; ///< COMPOUNDMIN (default: 0) 147 int sl_compsylmax; ///< COMPOUNDSYLMAX (default: MAXWLEN) 148 int sl_compoptions; ///< COMP_* flags 149 garray_T sl_comppat; ///< CHECKCOMPOUNDPATTERN items 150 regprog_T *sl_compprog; ///< COMPOUNDRULE turned into a regexp progrm 151 ///< (NULL when no compounding) 152 uint8_t *sl_comprules; ///< all COMPOUNDRULE concatenated (or NULL) 153 uint8_t *sl_compstartflags; ///< flags for first compound word 154 uint8_t *sl_compallflags; ///< all flags for compound words 155 bool sl_nobreak; ///< When true: no spaces between words 156 char *sl_syllable; ///< SYLLABLE repeatable chars or NULL 157 garray_T sl_syl_items; ///< syllable items 158 159 int sl_prefixcnt; ///< number of items in "sl_prefprog" 160 regprog_T **sl_prefprog; ///< table with regprogs for prefixes 161 162 garray_T sl_rep; ///< list of fromto_T entries from REP lines 163 int16_t sl_rep_first[256]; ///< indexes where byte first appears, -1 if there is none 164 garray_T sl_sal; ///< list of salitem_T entries from SAL lines 165 salfirst_T sl_sal_first[256]; ///< indexes where byte first appears, -1 if there is none 166 bool sl_followup; ///< SAL followup 167 bool sl_collapse; ///< SAL collapse_result 168 bool sl_rem_accents; ///< SAL remove_accents 169 bool sl_sofo; ///< SOFOFROM and SOFOTO instead of SAL items: 170 ///< "sl_sal_first" maps chars 171 ///< "sl_sal" is a list of wide char lists. 172 garray_T sl_repsal; ///< list of fromto_T entries from REPSAL lines 173 int16_t sl_repsal_first[256]; ///< sl_rep_first for REPSAL lines 174 bool sl_nosplitsugs; ///< don't suggest splitting a word 175 bool sl_nocompoundsugs; ///< don't suggest compounding 176 177 // Info from the .sug file. Loaded on demand. 178 time_t sl_sugtime; ///< timestamp for .sug file 179 uint8_t *sl_sbyts; ///< soundfolded word bytes 180 idx_T *sl_sidxs; ///< soundfolded word indexes 181 buf_T *sl_sugbuf; ///< buffer with word number table 182 bool sl_sugloaded; ///< true when .sug file was loaded or failed to load 183 184 bool sl_has_map; ///< true, if there is a MAP line 185 hashtab_T sl_map_hash; ///< MAP for multi-byte chars 186 int sl_map_array[256]; ///< MAP for first 256 chars 187 hashtab_T sl_sounddone; ///< table with soundfolded words that have 188 ///< handled, see add_sound_suggest() 189 }; 190 191 /// Structure used in "b_langp", filled from 'spelllang'. 192 typedef struct { 193 slang_T *lp_slang; ///< info for this language 194 slang_T *lp_sallang; ///< language used for sound folding or NULL 195 slang_T *lp_replang; ///< language used for REP items or NULL 196 int lp_region; ///< bitmask for region or REGION_ALL 197 } langp_T; 198 199 #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i)) 200 201 #define VIMSUGMAGIC "VIMsug" // string at start of Vim .sug file 202 #define VIMSUGMAGICL 6 203 #define VIMSUGVERSION 1 204 205 enum { REGION_ALL = 0xff, }; ///< word valid in all regions 206 207 /// The tables used for recognizing word characters according to spelling. 208 /// These are only used for the first 256 characters of 'encoding'. 209 typedef struct { 210 bool st_isw[256]; ///< flags: is word char 211 bool st_isu[256]; ///< flags: is uppercase char 212 uint8_t st_fold[256]; ///< chars: folded case 213 uint8_t st_upper[256]; ///< chars: upper case 214 } spelltab_T; 215 216 /// Values for "what" argument of spell_add_word() 217 typedef enum { 218 SPELL_ADD_GOOD = 0, 219 SPELL_ADD_BAD = 1, 220 SPELL_ADD_RARE = 2, 221 } SpellAddType; 222 223 typedef struct { 224 uint16_t wc_count; ///< nr of times word was seen 225 char wc_word[]; ///< word 226 } wordcount_T; 227 228 #define WC_KEY_OFF offsetof(wordcount_T, wc_word) 229 #define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF)) 230 enum { MAXWORDCOUNT = 0xffff, };