neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

spell_defs.h (9493B)


      1 #pragma once
      2 
      3 #include <stdbool.h>
      4 #include <stdint.h>
      5 
      6 #include "nvim/buffer_defs.h"
      7 
      8 enum {
      9  /// Assume max. word len is this many bytes.
     10  /// Some places assume a word length fits in a byte, thus it can't be above 255.
     11  MAXWLEN = 254,
     12 };
     13 
     14 enum { MAXREGIONS = 8, };  ///< Number of regions supported.
     15 
     16 /// Type used for indexes in the word tree need to be at least 4 bytes.  If int
     17 /// is 8 bytes we could use something smaller, but what?
     18 typedef int idx_T;
     19 
     20 #define SPL_FNAME_TMPL  "%s.%s.spl"
     21 #define SPL_FNAME_ADD   ".add."
     22 #define SPL_FNAME_ASCII ".ascii."
     23 
     24 /// Flags used for a word.  Only the lowest byte can be used, the region byte
     25 /// comes above it.
     26 enum {
     27  WF_REGION  = 0x01,  ///< region byte follows
     28  WF_ONECAP  = 0x02,  ///< word with one capital (or all capitals)
     29  WF_ALLCAP  = 0x04,  ///< word must be all capitals
     30  WF_RARE    = 0x08,  ///< rare word
     31  WF_BANNED  = 0x10,  ///< bad word
     32  WF_AFX     = 0x20,  ///< affix ID follows
     33  WF_FIXCAP  = 0x40,  ///< keep-case word, allcap not allowed
     34  WF_KEEPCAP = 0x80,  ///< keep-case word
     35  WF_CAPMASK = (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP),
     36 };
     37 
     38 /// for <flags2>, shifted up one byte to be used in wn_flags
     39 enum {
     40  WF_HAS_AFF   = 0x0100,  ///< word includes affix
     41  WF_NEEDCOMP  = 0x0200,  ///< word only valid in compound
     42  WF_NOSUGGEST = 0x0400,  ///< word not to be suggested
     43  WF_COMPROOT  = 0x0800,  ///< already compounded word, COMPOUNDROOT
     44  WF_NOCOMPBEF = 0x1000,  ///< no compounding before this word
     45  WF_NOCOMPAFT = 0x2000,  ///< no compounding after this word
     46 };
     47 
     48 /// flags for <pflags>
     49 enum {
     50  WFP_RARE       = 0x01,  ///< rare prefix
     51  WFP_NC         = 0x02,  ///< prefix is not combining
     52  WFP_UP         = 0x04,  ///< to-upper prefix
     53  WFP_COMPPERMIT = 0x08,  ///< prefix with COMPOUNDPERMITFLAG
     54  WFP_COMPFORBID = 0x10,  ///< prefix with COMPOUNDFORBIDFLAG
     55 };
     56 
     57 /// Flags for postponed prefixes in "sl_pidxs".  Must be above affixID (one
     58 /// byte) and prefcondnr (two bytes).
     59 enum {
     60  WF_RAREPFX        = WFP_RARE << 24,        ///< rare postponed prefix
     61  WF_PFX_NC         = WFP_NC << 24,          ///< non-combining postponed prefix
     62  WF_PFX_UP         = WFP_UP << 24,          ///< to-upper postponed prefix
     63  WF_PFX_COMPPERMIT = WFP_COMPPERMIT << 24,  ///< postponed prefix with COMPOUNDPERMITFLAG
     64  WF_PFX_COMPFORBID = WFP_COMPFORBID << 24,  ///< postponed prefix with COMPOUNDFORBIDFLAG
     65 };
     66 
     67 /// flags for <compoptions>
     68 enum {
     69  COMP_CHECKDUP    = 1,  ///< CHECKCOMPOUNDDUP
     70  COMP_CHECKREP    = 2,  ///< CHECKCOMPOUNDREP
     71  COMP_CHECKCASE   = 4,  ///< CHECKCOMPOUNDCASE
     72  COMP_CHECKTRIPLE = 8,  ///< CHECKCOMPOUNDTRIPLE
     73 };
     74 
     75 /// Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep,
     76 /// si_repsal, sl_rep, and si_sal.  Not for sl_sal!
     77 /// One replacement: from "ft_from" to "ft_to".
     78 typedef struct {
     79  char *ft_from;
     80  char *ft_to;
     81 } fromto_T;
     82 
     83 /// Info from "SAL" entries in ".aff" file used in sl_sal.
     84 /// The info is split for quick processing by spell_soundfold().
     85 /// Note that "sm_oneof" and "sm_rules" point into sm_lead.
     86 typedef struct {
     87  char *sm_lead;    ///< leading letters
     88  int sm_leadlen;   ///< length of "sm_lead"
     89  char *sm_oneof;   ///< letters from () or NULL
     90  char *sm_rules;   ///< rules like ^, $, priority
     91  char *sm_to;      ///< replacement.
     92  int *sm_lead_w;   ///< wide character copy of "sm_lead"
     93  int *sm_oneof_w;  ///< wide character copy of "sm_oneof"
     94  int *sm_to_w;     ///< wide character copy of "sm_to"
     95 } salitem_T;
     96 
     97 typedef int salfirst_T;
     98 
     99 /// Values for SP_*ERROR are negative, positive values are used by
    100 /// read_cnt_string().
    101 enum {
    102  SP_TRUNCERROR = -1,  ///< spell file truncated error
    103  SP_FORMERROR  = -2,  ///< format error in spell file
    104  SP_OTHERERROR = -3,  ///< other error while reading spell file
    105 };
    106 
    107 /// Structure used to store words and other info for one language, loaded from
    108 /// a .spl file.
    109 /// The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
    110 /// case-folded words.  "sl_kbyts/sl_kidxs" is for keep-case words.
    111 ///
    112 /// The "byts" array stores the possible bytes in each tree node, preceded by
    113 /// the number of possible bytes, sorted on byte value:
    114 ///      <len> <byte1> <byte2> ...
    115 /// The "idxs" array stores the index of the child node corresponding to the
    116 /// byte in "byts".
    117 /// Exception: when the byte is zero, the word may end here and "idxs" holds
    118 /// the flags, region mask and affixID for the word.  There may be several
    119 /// zeros in sequence for alternative flag/region/affixID combinations.
    120 typedef struct slang_S slang_T;
    121 
    122 struct slang_S {
    123  slang_T *sl_next;   ///< next language
    124  char *sl_name;      ///< language name "en", "en.rare", "nl", etc.
    125  char *sl_fname;     ///< name of .spl file
    126  bool sl_add;        ///< true if it's a .add file.
    127 
    128  uint8_t *sl_fbyts;  ///< case-folded word bytes
    129  int sl_fbyts_len;   ///< length of sl_fbyts
    130  idx_T *sl_fidxs;    ///< case-folded word indexes
    131  uint8_t *sl_kbyts;  ///< keep-case word bytes
    132  idx_T *sl_kidxs;    ///< keep-case word indexes
    133  uint8_t *sl_pbyts;  ///< prefix tree word bytes
    134  idx_T *sl_pidxs;    ///< prefix tree word indexes
    135 
    136  char *sl_info;      ///< infotext string or NULL
    137 
    138  /// table with up to 8 region names plus NUL
    139  char sl_regions[MAXREGIONS * 2 + 1];
    140 
    141  char *sl_midword;              ///< MIDWORD string or NULL
    142 
    143  hashtab_T sl_wordcount;        ///< hashtable with word count, wordcount_T
    144 
    145  int sl_compmax;                ///< COMPOUNDWORDMAX (default: MAXWLEN)
    146  int sl_compminlen;             ///< COMPOUNDMIN (default: 0)
    147  int sl_compsylmax;             ///< COMPOUNDSYLMAX (default: MAXWLEN)
    148  int sl_compoptions;            ///< COMP_* flags
    149  garray_T sl_comppat;           ///< CHECKCOMPOUNDPATTERN items
    150  regprog_T *sl_compprog;        ///< COMPOUNDRULE turned into a regexp progrm
    151                                 ///< (NULL when no compounding)
    152  uint8_t *sl_comprules;         ///< all COMPOUNDRULE concatenated (or NULL)
    153  uint8_t *sl_compstartflags;    ///< flags for first compound word
    154  uint8_t *sl_compallflags;      ///< all flags for compound words
    155  bool sl_nobreak;               ///< When true: no spaces between words
    156  char *sl_syllable;             ///< SYLLABLE repeatable chars or NULL
    157  garray_T sl_syl_items;         ///< syllable items
    158 
    159  int sl_prefixcnt;              ///< number of items in "sl_prefprog"
    160  regprog_T **sl_prefprog;       ///< table with regprogs for prefixes
    161 
    162  garray_T sl_rep;               ///< list of fromto_T entries from REP lines
    163  int16_t sl_rep_first[256];     ///< indexes where byte first appears, -1 if there is none
    164  garray_T sl_sal;               ///< list of salitem_T entries from SAL lines
    165  salfirst_T sl_sal_first[256];  ///< indexes where byte first appears, -1 if there is none
    166  bool sl_followup;              ///< SAL followup
    167  bool sl_collapse;              ///< SAL collapse_result
    168  bool sl_rem_accents;           ///< SAL remove_accents
    169  bool sl_sofo;                  ///< SOFOFROM and SOFOTO instead of SAL items:
    170                                 ///< "sl_sal_first" maps chars
    171                                 ///< "sl_sal" is a list of wide char lists.
    172  garray_T sl_repsal;            ///< list of fromto_T entries from REPSAL lines
    173  int16_t sl_repsal_first[256];  ///< sl_rep_first for REPSAL lines
    174  bool sl_nosplitsugs;           ///< don't suggest splitting a word
    175  bool sl_nocompoundsugs;        ///< don't suggest compounding
    176 
    177  // Info from the .sug file.  Loaded on demand.
    178  time_t sl_sugtime;       ///< timestamp for .sug file
    179  uint8_t *sl_sbyts;       ///< soundfolded word bytes
    180  idx_T *sl_sidxs;         ///< soundfolded word indexes
    181  buf_T *sl_sugbuf;        ///< buffer with word number table
    182  bool sl_sugloaded;       ///< true when .sug file was loaded or failed to load
    183 
    184  bool sl_has_map;         ///< true, if there is a MAP line
    185  hashtab_T sl_map_hash;   ///< MAP for multi-byte chars
    186  int sl_map_array[256];   ///< MAP for first 256 chars
    187  hashtab_T sl_sounddone;  ///< table with soundfolded words that have
    188                           ///< handled, see add_sound_suggest()
    189 };
    190 
    191 /// Structure used in "b_langp", filled from 'spelllang'.
    192 typedef struct {
    193  slang_T *lp_slang;    ///< info for this language
    194  slang_T *lp_sallang;  ///< language used for sound folding or NULL
    195  slang_T *lp_replang;  ///< language used for REP items or NULL
    196  int lp_region;        ///< bitmask for region or REGION_ALL
    197 } langp_T;
    198 
    199 #define LANGP_ENTRY(ga, i)      (((langp_T *)(ga).ga_data) + (i))
    200 
    201 #define VIMSUGMAGIC "VIMsug"    // string at start of Vim .sug file
    202 #define VIMSUGMAGICL 6
    203 #define VIMSUGVERSION 1
    204 
    205 enum { REGION_ALL = 0xff, };  ///< word valid in all regions
    206 
    207 /// The tables used for recognizing word characters according to spelling.
    208 /// These are only used for the first 256 characters of 'encoding'.
    209 typedef struct {
    210  bool st_isw[256];       ///< flags: is word char
    211  bool st_isu[256];       ///< flags: is uppercase char
    212  uint8_t st_fold[256];   ///< chars: folded case
    213  uint8_t st_upper[256];  ///< chars: upper case
    214 } spelltab_T;
    215 
    216 /// Values for "what" argument of spell_add_word()
    217 typedef enum {
    218  SPELL_ADD_GOOD = 0,
    219  SPELL_ADD_BAD = 1,
    220  SPELL_ADD_RARE = 2,
    221 } SpellAddType;
    222 
    223 typedef struct {
    224  uint16_t wc_count;  ///< nr of times word was seen
    225  char wc_word[];     ///< word
    226 } wordcount_T;
    227 
    228 #define WC_KEY_OFF   offsetof(wordcount_T, wc_word)
    229 #define HI2WC(hi)    ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF))
    230 enum { MAXWORDCOUNT = 0xffff, };