mbyte_defs.h (2151B)
1 #pragma once 2 3 #include <stdbool.h> 4 #include <stdint.h> 5 #include <utf8proc.h> 6 7 #include "nvim/iconv_defs.h" 8 9 enum { 10 /// Maximum number of bytes in a multi-byte character. It can be one 32-bit 11 /// character of up to 6 bytes, or one 16-bit character of up to three bytes 12 /// plus six following composing characters of three bytes each. 13 MB_MAXBYTES = 21, 14 /// Maximum length of a Unicode character, excluding composing characters. 15 MB_MAXCHAR = 6, 16 }; 17 18 /// properties used in enc_canon_table[] (first three mutually exclusive) 19 enum { 20 ENC_8BIT = 0x01, 21 ENC_DBCS = 0x02, 22 ENC_UNICODE = 0x04, 23 24 ENC_ENDIAN_B = 0x10, ///< Unicode: Big endian 25 ENC_ENDIAN_L = 0x20, ///< Unicode: Little endian 26 27 ENC_2BYTE = 0x40, ///< Unicode: UCS-2 28 ENC_4BYTE = 0x80, ///< Unicode: UCS-4 29 ENC_2WORD = 0x100, ///< Unicode: UTF-16 30 31 ENC_LATIN1 = 0x200, ///< Latin1 32 ENC_LATIN9 = 0x400, ///< Latin9 33 ENC_MACROMAN = 0x800, ///< Mac Roman (not Macro Man! :-) 34 }; 35 36 /// Flags for vimconv_T 37 typedef enum { 38 CONV_NONE = 0, 39 CONV_TO_UTF8 = 1, 40 CONV_9_TO_UTF8 = 2, 41 CONV_TO_LATIN1 = 3, 42 CONV_TO_LATIN9 = 4, 43 CONV_ICONV = 5, 44 } ConvFlags; 45 46 #define MBYTE_NONE_CONV { \ 47 .vc_type = CONV_NONE, \ 48 .vc_factor = 1, \ 49 .vc_fail = false, \ 50 } 51 52 /// Structure used for string conversions 53 typedef struct { 54 int vc_type; ///< Zero or more ConvFlags. 55 int vc_factor; ///< Maximal expansion factor. 56 iconv_t vc_fd; ///< Value for CONV_ICONV. 57 bool vc_fail; ///< What to do with invalid characters: if true, fail, 58 ///< otherwise use '?'. 59 } vimconv_T; 60 61 typedef struct { 62 int32_t value; ///< Code point. 63 int len; ///< Length in bytes. 64 } CharInfo; 65 66 typedef struct { 67 char *ptr; ///< Pointer to the first byte of the character. 68 CharInfo chr; ///< Information about the character. 69 } StrCharInfo; 70 71 typedef struct { 72 int8_t begin_off; ///< Offset to the first byte of the codepoint. 73 int8_t end_off; ///< Offset to one past the end byte of the codepoint. 74 } CharBoundsOff; 75 76 typedef utf8proc_int32_t GraphemeState; 77 78 enum { UNICODE_INVALID = 0xFFFD, };