c_grammar.lua (10140B)
1 -- lpeg grammar for building api metadata from a set of header files. It 2 -- ignores comments and preprocessor commands and parses a very small subset 3 -- of C prototypes with a limited set of types 4 5 ---@diagnostic disable: missing-fields 6 7 --- @class nvim.c_grammar.Proto 8 --- @field [1] 'proto' 9 --- @field pos integer 10 --- @field endpos integer 11 --- @field name string 12 --- @field return_type string 13 --- @field parameters [string, string][] 14 --- 15 --- Decl modifiers 16 --- 17 --- @field static true? 18 --- @field inline true? 19 --- 20 --- Attributes 21 --- 22 --- @field since integer? 23 --- @field deprecated_since integer? 24 --- @field fast true? 25 --- @field ret_alloc true? 26 --- @field noexport true? 27 --- @field remote_only true? 28 --- @field lua_only true? 29 --- @field textlock_allow_cmdwin true? 30 --- @field textlock true? 31 --- @field remote_impl true? 32 --- @field compositor_impl true? 33 --- @field client_impl true? 34 --- @field client_ignore true? 35 36 --- @class nvim.c_grammar.Preproc 37 --- @field [1] 'preproc' 38 --- @field content string 39 40 --- @class nvim.c_grammar.Keyset.Field 41 --- @field type string 42 --- @field name string 43 --- @field dict_key? string 44 45 --- @class nvim.c_grammar.Keyset 46 --- @field [1] 'typedef' 47 --- @field keyset_name string 48 --- @field fields nvim.c_grammar.Keyset.Field[] 49 50 --- @class nvim.c_grammar.Empty 51 --- @field [1] 'empty' 52 53 --- @alias nvim.c_grammar.result 54 --- | nvim.c_grammar.Proto 55 --- | nvim.c_grammar.Preproc 56 --- | nvim.c_grammar.Empty 57 --- | nvim.c_grammar.Keyset 58 59 --- @class nvim.c_grammar 60 --- @field match fun(self, input: string): nvim.c_grammar.result[] 61 62 local lpeg = vim.lpeg 63 64 local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V 65 local C, Ct, Cc, Cg, Cp = lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cg, lpeg.Cp 66 67 --- @param pat vim.lpeg.Pattern 68 local function rep(pat) 69 return pat ^ 0 70 end 71 72 --- @param pat vim.lpeg.Pattern 73 local function rep1(pat) 74 return pat ^ 1 75 end 76 77 --- @param pat vim.lpeg.Pattern 78 local function opt(pat) 79 return pat ^ -1 80 end 81 82 local any = P(1) 83 local letter = R('az', 'AZ') + S('_$') 84 local num = R('09') 85 local alpha = letter + num 86 local nl = P('\r\n') + P('\n') 87 local space = S(' \t') 88 local str = P('"') * rep((P('\\') * any) + (1 - P('"'))) * P('"') 89 local char = P("'") * (any - P("'")) * P("'") 90 local ws = space + nl 91 local wb = #-alpha -- word boundary 92 local id = letter * rep(alpha) 93 94 local comment_inline = P('/*') * rep(1 - P('*/')) * P('*/') 95 local comment = P('//') * rep(1 - nl) * nl 96 local preproc = Ct(Cc('preproc') * P('#') * Cg(rep(1 - nl) * nl, 'content')) 97 98 local fill = rep(ws + comment_inline + comment + preproc) 99 100 --- @param s string 101 --- @return vim.lpeg.Pattern 102 local function word(s) 103 return fill * P(s) * wb * fill 104 end 105 106 --- @param x vim.lpeg.Pattern 107 local function comma1(x) 108 return x * rep(fill * P(',') * fill * x) 109 end 110 111 --- @param v string 112 local function Pf(v) 113 return fill * P(v) * fill 114 end 115 116 --- @param x vim.lpeg.Pattern 117 local function paren(x) 118 return P('(') * fill * x * fill * P(')') 119 end 120 121 local cdoc_comment = P('///') * opt(Ct(Cg(rep(space) * rep(1 - nl), 'comment'))) 122 123 local braces = P({ 124 'S', 125 A = comment_inline + comment + preproc + str + char + (any - S('{}')), 126 S = P('{') * rep(V('A')) * rep(V('S') + V('A')) * P('}'), 127 }) 128 129 --- @alias nvim.c_grammar.Container.Union ['Union', string[]] 130 --- @alias nvim.c_grammar.Container.Tuple ['Tuple', string[]] 131 --- @alias nvim.c_grammar.Container.Enum ['Enum', string[]] 132 --- @alias nvim.c_grammar.Container.ArrayOf ['ArrayOf', string, integer?] 133 --- @alias nvim.c_grammar.Container.DictOf ['DictOf', string] 134 --- @alias nvim.c_grammar.Container.LuaRefOf ['LuaRefOf', [string, string][], string] 135 --- @alias nvim.c_grammar.Container.Dict ['Dict', string] 136 --- @alias nvim.c_grammar.Container.DictAs ['DictAs', string] 137 138 --- @alias nvim.c_grammar.Container 139 --- | nvim.c_grammar.Container.Union 140 --- | nvim.c_grammar.Container.Tuple 141 --- | nvim.c_grammar.Container.Enum 142 --- | nvim.c_grammar.Container.ArrayOf 143 --- | nvim.c_grammar.Container.DictOf 144 --- | nvim.c_grammar.Container.LuaRefOf 145 --- | nvim.c_grammar.Container.Dict 146 147 -- stylua: ignore start 148 local typed_container = P({ 149 'S', 150 S = Ct( 151 Cg(opt(P('*')) * P('Union')) * paren(Ct(comma1(V('TY')))) 152 + Cg(opt(P('*')) * P('Enum')) * paren(Ct(comma1(Cg(str)))) 153 + Cg(opt(P('*')) * P('Tuple')) * paren(Ct(comma1(V('TY')))) 154 + Cg(opt(P('*')) * P('ArrayOf')) * paren(V('TY') * opt(P(',') * fill * C(rep1(num)))) 155 + Cg(opt(P('*')) * P('DictOf')) * paren(V('TY')) 156 + Cg(opt(P('*')) * P('LuaRefOf')) 157 * paren( 158 Ct(paren(comma1(Ct((V('TY') + C(str)) * rep1(ws) * Cg(V('ID')))))) 159 * opt(P(',') * fill * V('TY')) 160 ) 161 + Cg(opt(P('*')) * P('Dict')) * paren(C(id)) 162 + Cg(opt(P('*')) * P('DictAs')) * paren(C(id)) 163 + Cg(opt(P('*')) * P('Set')) * paren(C(id)) 164 + Cg(opt(P('*')) * P('PMap')) * paren(C(id)) 165 ), 166 -- Remove captures here (with / 0 ) as api_types will recursively run parse the type. 167 TY = Cg(V('S') / 0 + V('ID')), 168 ID = opt(P('*')) * id, 169 }) 170 -- stylua: ignore end 171 172 local ptr_mod = word('restrict') + word('__restrict') + word('const') 173 local opt_ptr = rep(Pf('*') * opt(ptr_mod)) 174 175 --- @param name string 176 --- @param var string 177 --- @return vim.lpeg.Pattern 178 local function attr(name, var) 179 return Cg((P(name) * Cc(true)), var) 180 end 181 182 --- @param name string 183 --- @param var string 184 --- @return vim.lpeg.Pattern 185 local function attr_num(name, var) 186 return Cg((P(name) * paren(C(rep1(num)))), var) 187 end 188 189 local fattr = ( 190 attr_num('FUNC_API_SINCE', 'since') 191 + attr_num('FUNC_API_DEPRECATED_SINCE', 'deprecated_since') 192 + attr('FUNC_API_FAST', 'fast') 193 + attr('FUNC_API_RET_ALLOC', 'ret_alloc') 194 + attr('FUNC_API_NOEXPORT', 'noexport') 195 + attr('FUNC_API_REMOTE_ONLY', 'remote_only') 196 + attr('FUNC_API_LUA_ONLY', 'lua_only') 197 + attr('FUNC_API_TEXTLOCK_ALLOW_CMDWIN', 'textlock_allow_cmdwin') 198 + attr('FUNC_API_TEXTLOCK', 'textlock') 199 + attr('FUNC_API_REMOTE_IMPL', 'remote_impl') 200 + attr('FUNC_API_COMPOSITOR_IMPL', 'compositor_impl') 201 + attr('FUNC_API_CLIENT_IMPL', 'client_impl') 202 + attr('FUNC_API_CLIENT_IGNORE', 'client_ignore') 203 + (P('FUNC_') * rep(alpha) * opt(fill * paren(rep(1 - P(')') * any)))) 204 ) 205 206 local void = P('void') * wb 207 208 local api_param_type = ( 209 (word('Error') * opt_ptr * Cc('error')) 210 + (word('Arena') * opt_ptr * Cc('arena')) 211 + (word('lua_State') * opt_ptr * Cc('lstate')) 212 ) 213 214 local ctype = C( 215 opt(word('const')) 216 * ( 217 typed_container / 0 218 -- 'unsigned' is a type modifier, and a type itself 219 + (word('unsigned char') + word('unsigned')) 220 + (word('struct') * fill * id) 221 + id 222 ) 223 * opt(word('const')) 224 * opt_ptr 225 ) 226 227 local return_type = (C(void) * fill) + ctype 228 229 -- stylua: ignore start 230 local params = Ct( 231 (void * #P(')')) 232 + comma1(Ct( 233 (api_param_type + ctype) 234 * fill 235 * C(id) 236 * rep(Pf('[') * rep(alpha) * Pf(']')) 237 * rep(fill * fattr) 238 )) 239 * opt(Pf(',') * P('...')) 240 ) 241 -- stylua: ignore end 242 243 local ignore_line = rep1(1 - nl) * nl 244 local empty_line = Ct(Cc('empty') * nl * nl) 245 246 local proto_name = opt_ptr * fill * id 247 248 -- __inline is used in MSVC 249 local decl_mod = ( 250 Cg(word('static') * Cc(true), 'static') 251 + Cg((word('inline') + word('__inline')) * Cc(true), 'inline') 252 ) 253 254 local proto = Ct( 255 Cg(Cp(), 'pos') 256 * Cc('proto') 257 * -#P('typedef') 258 * #alpha 259 * opt(P('DLLEXPORT') * rep1(ws)) 260 * rep(decl_mod) 261 * Cg(return_type, 'return_type') 262 * fill 263 * Cg(proto_name, 'name') 264 * fill 265 * paren(Cg(params, 'parameters')) 266 * Cg(Cc(false), 'fast') 267 * rep(fill * fattr) 268 * Cg(Cp(), 'endpos') 269 * (fill * (S(';') + braces)) 270 ) 271 272 local keyset_field = Ct( 273 Cg(ctype, 'type') 274 * fill 275 * Cg(id, 'name') 276 * fill 277 * opt(P('DictKey') * paren(Cg(rep1(1 - P(')')), 'dict_key'))) 278 * Pf(';') 279 ) 280 281 local keyset = Ct( 282 P('typedef') 283 * word('struct') 284 * Pf('{') 285 * Cg(Ct(rep1(keyset_field)), 'fields') 286 * Pf('}') 287 * P('Dict') 288 * paren(Cg(id, 'keyset_name')) 289 * Pf(';') 290 ) 291 292 local grammar = 293 Ct(rep1(empty_line + proto + cdoc_comment + comment + preproc + ws + keyset + ignore_line)) 294 295 if arg[1] == '--test' then 296 for i, t in ipairs({ 297 'void multiqueue_put_event(MultiQueue *self, Event event) {} ', 298 'void *xmalloc(size_t size) {} ', 299 { 300 'struct tm *os_localtime_r(const time_t *restrict clock,', 301 ' struct tm *restrict result) FUNC_ATTR_NONNULL_ALL {}', 302 }, 303 { 304 '_Bool', 305 '# 163 "src/nvim/event/multiqueue.c"', 306 ' multiqueue_empty(MultiQueue *self)', 307 '{}', 308 }, 309 'const char *find_option_end(const char *arg, OptIndex *opt_idxp) {}', 310 'bool semsg(const char *const fmt, ...) {}', 311 'int32_t utf_ptr2CharInfo_impl(uint8_t const *p, uintptr_t const len) {}', 312 'void ex_argdedupe(exarg_T *eap FUNC_ATTR_UNUSED) {}', 313 'static TermKeySym register_c0(TermKey *tk, TermKeySym sym, unsigned char ctrl, const char *name) {}', 314 'unsigned get_bkc_flags(buf_T *buf) {}', 315 'char *xstpcpy(char *restrict dst, const char *restrict src) {}', 316 'bool try_leave(const TryState *const tstate, Error *const err) {}', 317 'void api_set_error(ErrorType errType) {}', 318 { 319 'void nvim_subscribe(uint64_t channel_id, String event)', 320 'FUNC_API_SINCE(1) FUNC_API_DEPRECATED_SINCE(13) FUNC_API_REMOTE_ONLY', 321 '{}', 322 }, 323 324 -- Do not consume leading preproc statements 325 { 326 '#line 1 "D:/a/neovim/neovim/src\\nvim/mark.h"', 327 'static __inline int mark_global_index(const char name)', 328 ' FUNC_ATTR_CONST', 329 '{}', 330 }, 331 { 332 '', 333 '#line 1 "D:/a/neovim/neovim/src\\nvim/mark.h"', 334 'static __inline int mark_global_index(const char name)', 335 '{}', 336 }, 337 { 338 'size_t xstrlcpy(char *__restrict dst, const char *__restrict src, size_t dsize)', 339 ' FUNC_ATTR_NONNULL_ALL', 340 ' {}', 341 }, 342 }) do 343 if type(t) == 'table' then 344 t = table.concat(t, '\n') .. '\n' 345 end 346 t = t:gsub(' +', ' ') 347 local r = grammar:match(t) 348 if not r then 349 print('Test ' .. i .. ' failed') 350 print(' |' .. table.concat(vim.split(t, '\n'), '\n |')) 351 end 352 end 353 end 354 355 return { 356 grammar = grammar --[[@as nvim.c_grammar]], 357 typed_container = typed_container, 358 }