languagetree.lua (43008B)
1 --- @brief A [LanguageTree]() contains a tree of parsers: the root treesitter parser for {lang} and 2 --- any "injected" language parsers, which themselves may inject other languages, recursively. 3 --- For example a Lua buffer containing some Vimscript commands needs multiple parsers to fully 4 --- understand its contents. 5 --- 6 --- To create a LanguageTree (parser object) for a given buffer and language, use: 7 --- 8 --- ```lua 9 --- local parser = vim.treesitter.get_parser(bufnr, lang) 10 --- ``` 11 --- 12 --- (where `bufnr=0` means current buffer). `lang` defaults to 'filetype'. 13 --- Note: currently the parser is retained for the lifetime of a buffer but this may change; 14 --- a plugin should keep a reference to the parser object if it wants incremental updates. 15 --- 16 --- Whenever you need to access the current syntax tree, parse the buffer: 17 --- 18 --- ```lua 19 --- local tree = parser:parse({ start_row, end_row }) 20 --- ``` 21 --- 22 --- This returns a table of immutable |treesitter-tree| objects representing the current state of 23 --- the buffer. When the plugin wants to access the state after a (possible) edit it must call 24 --- `parse()` again. If the buffer wasn't edited, the same tree will be returned again without extra 25 --- work. If the buffer was parsed before, incremental parsing will be done of the changed parts. 26 --- 27 --- Note: To use the parser directly inside a |nvim_buf_attach()| Lua callback, you must call 28 --- |vim.treesitter.get_parser()| before you register your callback. But preferably parsing 29 --- shouldn't be done directly in the change callback anyway as they will be very frequent. Rather 30 --- a plugin that does any kind of analysis on a tree should use a timer to throttle too frequent 31 --- updates. 32 --- 33 34 -- Debugging: 35 -- 36 -- vim.g.__ts_debug levels: 37 -- - 1. Messages from languagetree.lua 38 -- - 2. Parse messages from treesitter 39 -- - 2. Lex messages from treesitter 40 -- 41 -- Log file can be found in stdpath('log')/treesitter.log 42 43 local query = require('vim.treesitter.query') 44 local language = require('vim.treesitter.language') 45 local Range = require('vim.treesitter._range') 46 local hrtime = vim.uv.hrtime 47 48 -- Parse in 3ms chunks. 49 local default_parse_timeout_ns = 3 * 1000000 50 51 ---@type Range2[] 52 local entire_document_range = { 53 { 54 0, 55 math.huge --[[@as integer]], 56 }, 57 } 58 59 ---@alias TSCallbackName 60 ---| 'changedtree' 61 ---| 'bytes' 62 ---| 'detach' 63 ---| 'child_added' 64 ---| 'child_removed' 65 66 ---@alias TSCallbackNameOn 67 ---| 'on_changedtree' 68 ---| 'on_bytes' 69 ---| 'on_detach' 70 ---| 'on_child_added' 71 ---| 'on_child_removed' 72 73 ---@alias ParserThreadState { timeout: integer? } 74 75 --- @type table<TSCallbackNameOn,TSCallbackName> 76 local TSCallbackNames = { 77 on_changedtree = 'changedtree', 78 on_bytes = 'bytes', 79 on_detach = 'detach', 80 on_child_added = 'child_added', 81 on_child_removed = 'child_removed', 82 } 83 84 ---@nodoc 85 ---@class vim.treesitter.LanguageTree 86 ---@field private _callbacks table<TSCallbackName,function[]> Callback handlers 87 ---@field package _callbacks_rec table<TSCallbackName,function[]> Callback handlers (recursive) 88 ---@field private _children table<string,vim.treesitter.LanguageTree> Injected languages 89 ---@field private _injection_query vim.treesitter.Query Queries defining injected languages 90 ---@field private _processed_injection_region Range[]? Range for which injections have been processed 91 ---@field private _opts table Options 92 ---@field private _parser TSParser Parser for language 93 ---Table of regions for which the tree is currently running an async parse 94 ---@field private _ranges_being_parsed table<string, boolean> 95 ---Table of callback queues, keyed by each region for which the callbacks should be run 96 ---@field private _cb_queues table<string, fun(err?: string, trees?: table<integer, TSTree>)[]> 97 ---@field private _regions table<integer, Range6[]>? 98 ---The total number of regions. Since _regions can have holes, we cannot simply read this value from #_regions. 99 ---@field private _num_regions integer 100 ---List of regions this tree should manage and parse. If nil then regions are 101 ---taken from _trees. This is mostly a short-lived cache for included_regions() 102 ---@field private _lang string Language name 103 ---@field private _parent? vim.treesitter.LanguageTree Parent LanguageTree 104 ---@field private _source (integer|string) Buffer or string to parse 105 ---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language). 106 ---Each key is the index of region, which is synced with _regions and _valid. 107 ---@field private _valid_regions table<integer,true> Set of valid region IDs. 108 ---@field private _num_valid_regions integer Number of valid regions 109 ---@field private _is_entirely_valid boolean Whether the entire tree (excluding children) is valid. 110 ---@field private _logger? fun(logtype: string, msg: string) 111 ---@field private _logfile? file* 112 local LanguageTree = {} 113 114 ---Optional arguments: 115 ---@class vim.treesitter.LanguageTree.new.Opts 116 ---@inlinedoc 117 ---@field queries? table<string,string> -- Deprecated 118 ---@field injections? table<string,string> 119 120 LanguageTree.__index = LanguageTree 121 122 --- @nodoc 123 --- 124 --- LanguageTree contains a tree of parsers: the root treesitter parser for {lang} and any 125 --- "injected" language parsers, which themselves may inject other languages, recursively. 126 --- 127 ---@param source (integer|string) Buffer or text string to parse 128 ---@param lang string Root language of this tree 129 ---@param opts vim.treesitter.LanguageTree.new.Opts? 130 ---@return vim.treesitter.LanguageTree parser object 131 function LanguageTree.new(source, lang, opts) 132 assert(language.add(lang)) 133 opts = opts or {} 134 135 if source == 0 then 136 source = vim.api.nvim_get_current_buf() 137 end 138 139 local injections = opts.injections or {} 140 141 --- @class vim.treesitter.LanguageTree 142 local self = { 143 _source = source, 144 _lang = lang, 145 _children = {}, 146 _trees = {}, 147 _opts = opts, 148 _injection_query = injections[lang] and query.parse(lang, injections[lang]) 149 or query.get(lang, 'injections'), 150 _processed_injection_region = nil, 151 _valid_regions = {}, 152 _num_valid_regions = 0, 153 _num_regions = 1, 154 _is_entirely_valid = false, 155 _parser = vim._create_ts_parser(lang), 156 _ranges_being_parsed = {}, 157 _cb_queues = {}, 158 _callbacks = {}, 159 _callbacks_rec = {}, 160 } 161 162 setmetatable(self, LanguageTree) 163 164 if vim.g.__ts_debug and type(vim.g.__ts_debug) == 'number' then 165 self:_set_logger() 166 self:_log('START') 167 end 168 169 for _, name in pairs(TSCallbackNames) do 170 self._callbacks[name] = {} 171 self._callbacks_rec[name] = {} 172 end 173 174 return self 175 end 176 177 --- @private 178 function LanguageTree:_set_logger() 179 local source = self:source() 180 source = type(source) == 'string' and 'text' or tostring(source) 181 182 local lang = self:lang() 183 184 local logdir = vim.fn.stdpath('log') --[[@as string]] 185 186 vim.fn.mkdir(logdir, 'p') 187 local logfilename = vim.fs.joinpath(logdir, 'treesitter.log') 188 189 local logfile, openerr = io.open(logfilename, 'a+') 190 191 if not logfile or openerr then 192 error(string.format('Could not open file (%s) for logging: %s', logfilename, openerr)) 193 return 194 end 195 196 self._logfile = logfile 197 198 self._logger = function(logtype, msg) 199 self._logfile:write(string.format('%s:%s:(%s) %s\n', source, lang, logtype, msg)) 200 self._logfile:flush() 201 end 202 203 local log_lex = vim.g.__ts_debug >= 3 204 local log_parse = vim.g.__ts_debug >= 2 205 self._parser:_set_logger(log_lex, log_parse, self._logger) 206 end 207 208 ---Measure execution time of a function, in nanoseconds. 209 ---@generic R1, R2, R3 210 ---@param f fun(): R1, R2, R3 211 ---@return number, R1, R2, R3 212 local function tcall(f, ...) 213 local start = hrtime() 214 ---@diagnostic disable-next-line 215 local r = { f(...) } 216 --- @type number 217 local duration = hrtime() - start 218 --- @diagnostic disable-next-line: redundant-return-value 219 return duration, unpack(r) 220 end 221 222 ---@private 223 ---@param ... any 224 function LanguageTree:_log(...) 225 if not self._logger then 226 return 227 end 228 229 if not vim.g.__ts_debug or vim.g.__ts_debug < 1 then 230 return 231 end 232 233 local args = { ... } 234 if type(args[1]) == 'function' then 235 args = { args[1]() } 236 end 237 238 local info = debug.getinfo(2, 'nl') 239 local nregions = vim.tbl_count(self:included_regions()) 240 local prefix = 241 string.format('%s:%d: (#regions=%d) ', info.name or '???', info.currentline or 0, nregions) 242 243 local msg = { prefix } 244 for _, x in ipairs(args) do 245 if type(x) == 'string' then 246 msg[#msg + 1] = x 247 else 248 msg[#msg + 1] = vim.inspect(x, { newline = ' ', indent = '' }) 249 end 250 end 251 self._logger('nvim', table.concat(msg, ' ')) 252 end 253 254 --- Invalidates this parser and its children. 255 --- 256 --- Should only be called when the tracked state of the LanguageTree is not valid against the parse 257 --- tree in treesitter. Doesn't clear filesystem cache. Called often, so needs to be fast. 258 ---@param reload boolean|nil 259 function LanguageTree:invalidate(reload) 260 self._valid_regions = {} 261 self._num_valid_regions = 0 262 self._is_entirely_valid = false 263 self._parser:reset() 264 265 -- buffer was reloaded, reparse all trees 266 if reload then 267 for _, t in pairs(self._trees) do 268 self:_do_callback('changedtree', t:included_ranges(true), t) 269 end 270 self._trees = {} 271 end 272 273 for _, child in pairs(self._children) do 274 child:invalidate(reload) 275 end 276 end 277 278 --- Returns all trees of the regions parsed by this parser. 279 --- Does not include child languages. 280 --- The result is list-like if 281 --- * this LanguageTree is the root, in which case the result is empty or a singleton list; or 282 --- * the root LanguageTree is fully parsed. 283 --- 284 ---@return table<integer, TSTree> 285 function LanguageTree:trees() 286 return self._trees 287 end 288 289 --- Gets the language of this tree node. 290 --- @return string 291 function LanguageTree:lang() 292 return self._lang 293 end 294 295 --- @param region Range6[] 296 --- @param range? boolean|Range|Range[] 297 --- @return boolean 298 local function intercepts_region(region, range) 299 if #region == 0 then 300 return true 301 end 302 303 if range == nil then 304 return false 305 end 306 307 if type(range) == 'boolean' then 308 return range 309 end 310 311 local is_range_list = type(range[1]) == 'table' 312 for _, r in ipairs(region) do 313 if is_range_list then 314 for _, inner_range in ipairs(range) do 315 ---@cast inner_range Range 316 if Range.intercepts(r, inner_range) then 317 return true 318 end 319 end 320 elseif Range.intercepts(r, range) then 321 return true 322 end 323 end 324 325 return false 326 end 327 328 --- @param region1 Range6[] 329 --- @param region2 Range|Range[] 330 --- @return boolean 331 local function contains_region(region1, region2) 332 if type(region2[1]) ~= 'table' then 333 region2 = { region2 } 334 end 335 336 -- TODO: Combine intersection ranges in region1 337 local i, j, len1, len2 = 1, 1, #region1, #region2 338 while i <= len1 and j <= len2 do 339 local r1 = { Range.unpack4(region1[i]) } 340 local r2 = { Range.unpack4(region2[j]) } 341 342 if Range.contains(r1, r2) then 343 j = j + 1 344 elseif Range.cmp_pos.lt(r1[3], r1[4], r2[1], r2[2]) then 345 i = i + 1 346 else 347 return false -- r1 starts after r2 starts and thus can't cover it 348 end 349 end 350 351 return j > len2 352 end 353 354 --- Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| reflects the latest 355 --- state of the source. If invalid, user should call |LanguageTree:parse()|. 356 ---@param exclude_children boolean? whether to ignore the validity of children (default `false`) 357 ---@param range Range|Range[]? range (or list of ranges, sorted by starting point in ascending order) to check for validity 358 ---@return boolean 359 function LanguageTree:is_valid(exclude_children, range) 360 local valid_regions = self._valid_regions 361 362 if not self._is_entirely_valid then 363 if not range then 364 return false 365 end 366 -- TODO: Efficiently search for possibly intersecting regions using a binary search 367 for i, region in pairs(self:included_regions()) do 368 if 369 not valid_regions[i] 370 and ( 371 intercepts_region(region, range) 372 or (self._trees[i] and intercepts_region(self._trees[i]:included_ranges(false), range)) 373 ) 374 then 375 return false 376 end 377 end 378 end 379 380 if not exclude_children then 381 if 382 not self._processed_injection_region 383 or not contains_region(self._processed_injection_region, range or entire_document_range) 384 then 385 return false 386 end 387 388 for _, child in pairs(self._children) do 389 if not child:is_valid(exclude_children, range) then 390 return false 391 end 392 end 393 end 394 395 return true 396 end 397 398 --- Returns a map of language to child tree. 399 --- @return table<string,vim.treesitter.LanguageTree> 400 function LanguageTree:children() 401 return self._children 402 end 403 404 --- Returns the source content of the language tree (bufnr or string). 405 --- @return integer|string 406 function LanguageTree:source() 407 return self._source 408 end 409 410 --- @private 411 --- @param range boolean|Range|Range[]? 412 --- @param thread_state ParserThreadState 413 --- @return Range6[] changes 414 --- @return integer no_regions_parsed 415 --- @return number total_parse_time 416 function LanguageTree:_parse_regions(range, thread_state) 417 local changes = {} 418 local no_regions_parsed = 0 419 local total_parse_time = 0 420 421 -- If there are no ranges, set to an empty list 422 -- so the included ranges in the parser are cleared. 423 for i, ranges in pairs(self:included_regions()) do 424 if 425 not self._valid_regions[i] 426 and ( 427 intercepts_region(ranges, range) 428 or (self._trees[i] and intercepts_region(self._trees[i]:included_ranges(false), range)) 429 ) 430 then 431 self._parser:set_included_ranges(ranges) 432 433 local parse_time, tree, tree_changes = tcall( 434 self._parser.parse, 435 self._parser, 436 self._trees[i], 437 self._source, 438 true, 439 thread_state.timeout 440 ) 441 while true do 442 if tree then 443 break 444 end 445 coroutine.yield(self._trees, false) 446 447 parse_time, tree, tree_changes = tcall( 448 self._parser.parse, 449 self._parser, 450 self._trees[i], 451 self._source, 452 true, 453 thread_state.timeout 454 ) 455 end 456 457 self:_subtract_time(thread_state, parse_time) 458 459 self:_do_callback('changedtree', tree_changes, tree) 460 self._trees[i] = tree 461 vim.list_extend(changes, tree_changes) 462 463 total_parse_time = total_parse_time + parse_time 464 no_regions_parsed = no_regions_parsed + 1 465 self._valid_regions[i] = true 466 self._num_valid_regions = self._num_valid_regions + 1 467 468 if self._num_valid_regions == self._num_regions then 469 self._is_entirely_valid = true 470 end 471 end 472 end 473 474 return changes, no_regions_parsed, total_parse_time 475 end 476 477 --- @private 478 --- @param injections_by_lang table<string, Range6[][]> 479 function LanguageTree:_add_injections(injections_by_lang) 480 local seen_langs = {} ---@type table<string,boolean> 481 482 for lang, injection_regions in pairs(injections_by_lang) do 483 local has_lang = pcall(language.add, lang) 484 485 -- Child language trees should just be ignored if not found, since 486 -- they can depend on the text of a node. Intermediate strings 487 -- would cause errors for unknown parsers. 488 if has_lang then 489 local child = self._children[lang] 490 491 if not child then 492 child = self:add_child(lang) 493 end 494 495 child:set_included_regions(injection_regions) 496 seen_langs[lang] = true 497 end 498 end 499 500 for lang, _ in pairs(self._children) do 501 if not seen_langs[lang] then 502 self:remove_child(lang) 503 end 504 end 505 end 506 507 --- @param range boolean|Range|Range[]? 508 --- @return string 509 local function range_to_string(range) 510 if type(range) ~= 'table' then 511 return tostring(range) 512 end 513 if type(range[1]) ~= 'table' then 514 return table.concat(range, ',') 515 end 516 ---@cast range Range[] 517 local str = '' 518 for i, r in ipairs(range) do 519 if i > 1 then 520 str = str .. '|' 521 end 522 str = str .. table.concat(r, ',') 523 end 524 return str 525 end 526 527 --- @private 528 --- @param range boolean|Range? 529 --- @param callback fun(err?: string, trees?: table<integer, TSTree>) 530 function LanguageTree:_push_async_callback(range, callback) 531 local key = range_to_string(range) 532 self._cb_queues[key] = self._cb_queues[key] or {} 533 local queue = self._cb_queues[key] 534 queue[#queue + 1] = callback 535 end 536 537 --- @private 538 --- @param range boolean|Range? 539 --- @param err? string 540 --- @param trees? table<integer, TSTree> 541 function LanguageTree:_run_async_callbacks(range, err, trees) 542 local key = range_to_string(range) 543 for _, cb in ipairs(self._cb_queues[key]) do 544 cb(err, trees) 545 end 546 self._ranges_being_parsed[key] = nil 547 self._cb_queues[key] = nil 548 end 549 550 --- Run an asynchronous parse, calling {on_parse} when complete. 551 --- 552 --- @private 553 --- @param range boolean|Range? 554 --- @param on_parse fun(err?: string, trees?: table<integer, TSTree>) 555 --- @return table<integer, TSTree>? trees the list of parsed trees, if parsing completed synchronously 556 function LanguageTree:_async_parse(range, on_parse) 557 self:_push_async_callback(range, on_parse) 558 559 -- If we are already running an async parse, just queue the callback. 560 local range_string = range_to_string(range) 561 if not self._ranges_being_parsed[range_string] then 562 self._ranges_being_parsed[range_string] = true 563 else 564 return 565 end 566 567 local source = self._source 568 local is_buffer_parser = type(source) == 'number' 569 local buf = is_buffer_parser and vim.b[source] or nil 570 local ct = is_buffer_parser and buf.changedtick or nil 571 local total_parse_time = 0 572 local redrawtime = vim.o.redrawtime * 1000000 573 574 local thread_state = {} ---@type ParserThreadState 575 576 ---@type fun(): table<integer, TSTree>, boolean 577 local parse = coroutine.wrap(self._parse) 578 579 local function step() 580 if is_buffer_parser then 581 if 582 not vim.api.nvim_buf_is_valid(source --[[@as number]]) 583 then 584 return nil 585 end 586 587 -- If buffer was changed in the middle of parsing, reset parse state 588 if buf.changedtick ~= ct then 589 ct = buf.changedtick 590 total_parse_time = 0 591 parse = coroutine.wrap(self._parse) 592 end 593 end 594 595 thread_state.timeout = not vim.g._ts_force_sync_parsing and default_parse_timeout_ns or nil 596 local parse_time, trees, finished = tcall(parse, self, range, thread_state) 597 total_parse_time = total_parse_time + parse_time 598 599 if finished then 600 self:_run_async_callbacks(range, nil, trees) 601 return trees 602 elseif total_parse_time > redrawtime then 603 self:_run_async_callbacks(range, 'TIMEOUT', nil) 604 return nil 605 else 606 vim.schedule(step) 607 end 608 end 609 610 return step() 611 end 612 613 --- Recursively parse all regions in the language tree using |treesitter-parsers| 614 --- for the corresponding languages and run injection queries on the parsed trees 615 --- to determine whether child trees should be created and parsed. 616 --- 617 --- Any region with empty range (`{}`, typically only the root tree) is always parsed; 618 --- otherwise (typically injections) only if it intersects {range} (or if {range} is `true`). 619 --- 620 --- @param range? boolean|Range|Range[]: Parse this range (or list of ranges, sorted by starting 621 --- point in ascending order) in the parser's source. 622 --- Set to `true` to run a complete parse of the source (Note: Can be slow!) 623 --- Set to `false|nil` to only parse regions with empty ranges (typically 624 --- only the root tree without injections). 625 --- @param on_parse fun(err?: string, trees?: table<integer, TSTree>)? Function invoked when parsing completes. 626 --- When provided and `vim.g._ts_force_sync_parsing` is not set, parsing will run 627 --- asynchronously. The first argument to the function is a string representing the error type, 628 --- in case of a failure (currently only possible for timeouts). The second argument is the list 629 --- of trees returned by the parse (upon success), or `nil` if the parse timed out (determined 630 --- by 'redrawtime'). 631 --- 632 --- If parsing was still able to finish synchronously (within 3ms), `parse()` returns the list 633 --- of trees. Otherwise, it returns `nil`. 634 --- @return table<integer, TSTree>? 635 function LanguageTree:parse(range, on_parse) 636 if on_parse then 637 return self:_async_parse(range, on_parse) 638 end 639 local trees, _ = self:_parse(range, {}) 640 return trees 641 end 642 643 ---@param thread_state ParserThreadState 644 ---@param time integer 645 function LanguageTree:_subtract_time(thread_state, time) 646 thread_state.timeout = thread_state.timeout and math.max(thread_state.timeout - time, 0) 647 if thread_state.timeout == 0 then 648 coroutine.yield(self._trees, false) 649 end 650 end 651 652 --- @private 653 --- @param range? boolean|Range|Range[] 654 --- @param thread_state ParserThreadState 655 --- @return table<integer, TSTree> trees 656 --- @return boolean finished 657 function LanguageTree:_parse(range, thread_state) 658 if self:is_valid(nil, type(range) == 'table' and range or nil) then 659 self:_log('valid') 660 return self._trees, true 661 end 662 663 local changes --- @type Range6[]? 664 665 -- Collect some stats 666 local no_regions_parsed = 0 667 local query_time = 0 668 local total_parse_time = 0 669 670 -- At least 1 region is invalid 671 if not self:is_valid(true, type(range) == 'table' and range or nil) then 672 changes, no_regions_parsed, total_parse_time = self:_parse_regions(range, thread_state) 673 674 -- Need to run injections when we parsed something 675 if no_regions_parsed > 0 then 676 self._processed_injection_region = nil 677 end 678 end 679 680 if 681 range 682 and not ( 683 self._processed_injection_region 684 and contains_region( 685 self._processed_injection_region, 686 range ~= true and range or entire_document_range 687 ) 688 ) 689 then 690 local injections_by_lang = self:_get_injections(range, thread_state) 691 local time = tcall(self._add_injections, self, injections_by_lang) 692 self:_subtract_time(thread_state, time) 693 end 694 695 self:_log({ 696 changes = changes and #changes > 0 and changes or nil, 697 regions_parsed = no_regions_parsed, 698 parse_time = total_parse_time, 699 query_time = query_time, 700 range = range, 701 }) 702 703 for _, child in pairs(self._children) do 704 child:_parse(range, thread_state) 705 end 706 707 return self._trees, true 708 end 709 710 --- Invokes the callback for each |LanguageTree| recursively. 711 --- 712 --- Note: This includes the invoking tree's child trees as well. 713 --- 714 ---@param fn fun(tree: TSTree, ltree: vim.treesitter.LanguageTree) 715 function LanguageTree:for_each_tree(fn) 716 for _, tree in pairs(self._trees) do 717 fn(tree, self) 718 end 719 720 for _, child in pairs(self._children) do 721 child:for_each_tree(fn) 722 end 723 end 724 725 --- Adds a child language to this |LanguageTree|. 726 --- 727 --- If the language already exists as a child, it will first be removed. 728 --- 729 ---@private 730 ---@param lang string Language to add. 731 ---@return vim.treesitter.LanguageTree injected 732 function LanguageTree:add_child(lang) 733 if self._children[lang] then 734 self:remove_child(lang) 735 end 736 737 local child = LanguageTree.new(self._source, lang, self._opts) 738 739 -- Inherit recursive callbacks 740 for nm, cb in pairs(self._callbacks_rec) do 741 vim.list_extend(child._callbacks_rec[nm], cb) 742 end 743 744 child._parent = self 745 self._children[lang] = child 746 self:_do_callback('child_added', self._children[lang]) 747 748 return self._children[lang] 749 end 750 751 ---Returns the parent tree. `nil` for the root tree. 752 ---@return vim.treesitter.LanguageTree? 753 function LanguageTree:parent() 754 return self._parent 755 end 756 757 --- Removes a child language from this |LanguageTree|. 758 --- 759 ---@private 760 ---@param lang string Language to remove. 761 function LanguageTree:remove_child(lang) 762 local child = self._children[lang] 763 764 if child then 765 self._children[lang] = nil 766 child:destroy() 767 self:_do_callback('child_removed', child) 768 end 769 end 770 771 --- Destroys this |LanguageTree| and all its children. 772 --- 773 --- Any cleanup logic should be performed here. 774 --- 775 --- Note: This DOES NOT remove this tree from a parent. Instead, 776 --- `remove_child` must be called on the parent to remove it. 777 function LanguageTree:destroy() 778 -- Cleanup here 779 for _, child in pairs(self._children) do 780 child:destroy() 781 end 782 end 783 784 ---@param region Range6[] 785 local function region_tostr(region) 786 if #region == 0 then 787 return '[]' 788 end 789 local srow, scol = region[1][1], region[1][2] 790 local erow, ecol = region[#region][4], region[#region][5] 791 return string.format('[%d:%d-%d:%d]', srow, scol, erow, ecol) 792 end 793 794 ---@private 795 ---Iterate through all the regions. fn returns a boolean to indicate if the 796 ---region is valid or not. 797 ---@param fn fun(index: integer, region: Range6[]): boolean 798 function LanguageTree:_iter_regions(fn) 799 if vim.deep_equal(self._valid_regions, {}) then 800 return 801 end 802 803 if self._is_entirely_valid then 804 self:_log('was valid') 805 end 806 807 local all_valid = true 808 809 for i, region in pairs(self:included_regions()) do 810 if self._valid_regions[i] then 811 -- Setting this to nil rather than false allows us to determine if all regions were parsed 812 -- just by checking the length of _valid_regions. 813 self._valid_regions[i] = fn(i, region) and true or nil 814 if not self._valid_regions[i] then 815 self._num_valid_regions = self._num_valid_regions - 1 816 self:_log(function() 817 return 'invalidating region', i, region_tostr(region) 818 end) 819 end 820 end 821 822 if not self._valid_regions[i] then 823 all_valid = false 824 end 825 end 826 827 self._is_entirely_valid = all_valid 828 end 829 830 --- Sets the included regions that should be parsed by this |LanguageTree|. 831 --- A region is a set of nodes and/or ranges that will be parsed in the same context. 832 --- 833 --- For example, `{ { node1 }, { node2} }` contains two separate regions. 834 --- They will be parsed by the parser in two different contexts, thus resulting 835 --- in two separate trees. 836 --- 837 --- On the other hand, `{ { node1, node2 } }` is a single region consisting of 838 --- two nodes. This will be parsed by the parser in a single context, thus resulting 839 --- in a single tree. 840 --- 841 --- This allows for embedded languages to be parsed together across different 842 --- nodes, which is useful for templating languages like ERB and EJS. 843 --- 844 ---@private 845 ---@param new_regions (Range4|Range6|TSNode)[][] List of regions this tree should manage and parse. 846 function LanguageTree:set_included_regions(new_regions) 847 -- Transform the tables from 4 element long to 6 element long (with byte offset) 848 for _, region in ipairs(new_regions) do 849 for i, range in ipairs(region) do 850 if type(range) == 'table' and #range == 4 then 851 region[i] = Range.add_bytes(self._source, range --[[@as Range4]]) 852 elseif type(range) == 'userdata' then 853 --- @diagnostic disable-next-line: missing-fields LuaLS varargs bug 854 region[i] = { range:range(true) } 855 end 856 end 857 end 858 859 -- included_regions is not guaranteed to be list-like, but this is still sound, i.e. if 860 -- new_regions is different from included_regions, then outdated regions in included_regions are 861 -- invalidated. For example, if included_regions = new_regions ++ hole ++ outdated_regions, then 862 -- outdated_regions is invalidated by _iter_regions in else branch. 863 if #self:included_regions() ~= #new_regions then 864 -- TODO(lewis6991): inefficient; invalidate trees incrementally 865 for _, t in pairs(self._trees) do 866 self:_do_callback('changedtree', t:included_ranges(true), t) 867 end 868 self._trees = {} 869 self:invalidate() 870 else 871 self:_iter_regions(function(i, region) 872 return vim.deep_equal(new_regions[i], region) 873 end) 874 end 875 876 self._regions = new_regions 877 self._num_regions = #new_regions 878 end 879 880 ---Gets the set of included regions managed by this LanguageTree. This can be different from the 881 ---regions set by injection query, because a partial |LanguageTree:parse()| drops the regions 882 ---outside the requested range. 883 ---Each list represents a range in the form of 884 ---{ {start_row}, {start_col}, {start_bytes}, {end_row}, {end_col}, {end_bytes} }. 885 ---@return table<integer, Range6[]> 886 function LanguageTree:included_regions() 887 if self._regions then 888 return self._regions 889 end 890 891 -- treesitter.c will default empty ranges to { -1, -1, -1, -1, -1, -1} (the full range) 892 return { {} } 893 end 894 895 ---@param node TSNode 896 ---@param source string|integer 897 ---@param metadata vim.treesitter.query.TSMetadata 898 ---@param include_children boolean 899 ---@return Range6[] 900 local function get_node_ranges(node, source, metadata, include_children) 901 local range = vim.treesitter.get_range(node, source, metadata) 902 local child_count = node:named_child_count() 903 904 if include_children or child_count == 0 then 905 return { range } 906 end 907 908 local ranges = {} ---@type Range6[] 909 910 local srow, scol, sbyte, erow, ecol, ebyte = Range.unpack6(range) 911 912 -- We are excluding children so we need to mask out their ranges 913 for i = 0, child_count - 1 do 914 local child = assert(node:named_child(i)) 915 local c_srow, c_scol, c_sbyte, c_erow, c_ecol, c_ebyte = child:range(true) 916 if c_srow > srow or c_scol > scol then 917 ranges[#ranges + 1] = { srow, scol, sbyte, c_srow, c_scol, c_sbyte } 918 end 919 srow = c_erow 920 scol = c_ecol 921 sbyte = c_ebyte 922 end 923 924 if erow > srow or ecol > scol then 925 ranges[#ranges + 1] = Range.add_bytes(source, { srow, scol, sbyte, erow, ecol, ebyte }) 926 end 927 928 return ranges 929 end 930 931 ---Finds the intersection between two regions, assuming they are sorted in ascending order by 932 ---starting point. 933 ---@param region1 Range6[] 934 ---@param region2 Range6[]? 935 ---@return Range6[] 936 local function clip_regions(region1, region2) 937 if not region2 then 938 return region1 939 end 940 941 local result = {} 942 local i, j = 1, 1 943 944 while i <= #region1 and j <= #region2 do 945 local r1 = region1[i] 946 local r2 = region2[j] 947 948 local intersection = Range.intersection(r1, r2) 949 if intersection then 950 table.insert(result, intersection) 951 end 952 953 -- Advance the range that ends earlier 954 if Range.cmp_pos.le(r1[4], r1[5], r2[4], r2[5]) then 955 i = i + 1 956 else 957 j = j + 1 958 end 959 end 960 961 return result 962 end 963 964 ---@nodoc 965 ---@class vim.treesitter.languagetree.InjectionElem 966 ---@field combined boolean 967 ---@field regions Range6[][] 968 969 ---@alias vim.treesitter.languagetree.Injection table<string,table<integer,vim.treesitter.languagetree.InjectionElem>> 970 971 ---@param t vim.treesitter.languagetree.Injection 972 ---@param pattern integer 973 ---@param lang string 974 ---@param combined boolean 975 ---@param ranges Range6[] 976 ---@param parent_ranges Range6[]? 977 ---@param result table<string,Range6[][]> 978 local function add_injection(t, pattern, lang, combined, ranges, parent_ranges, result) 979 if #ranges == 0 then 980 -- Make sure not to add an empty range set as this is interpreted to mean the whole buffer. 981 return 982 end 983 984 if not result[lang] then 985 result[lang] = {} 986 end 987 988 if not combined then 989 table.insert(result[lang], clip_regions(ranges, parent_ranges)) 990 return 991 end 992 993 if not t[lang] then 994 t[lang] = {} 995 end 996 997 -- Key this by pattern. For combined injections, all captures of this pattern 998 -- will be parsed by treesitter as the same "source". 999 if not t[lang][pattern] then 1000 local regions = {} 1001 t[lang][pattern] = regions 1002 table.insert(result[lang], regions) 1003 end 1004 1005 for _, range in ipairs(clip_regions(ranges, parent_ranges)) do 1006 table.insert(t[lang][pattern], range) 1007 end 1008 end 1009 1010 -- TODO(clason): replace by refactored `ts.has_parser` API (without side effects) 1011 --- The result of this function is cached to prevent nvim_get_runtime_file from being 1012 --- called too often 1013 --- @param lang string parser name 1014 --- @return boolean # true if parser for {lang} exists on rtp 1015 local has_parser = vim.func._memoize(1, function(lang) 1016 return vim._ts_has_language(lang) 1017 or #vim.api.nvim_get_runtime_file('parser/' .. lang .. '.*', false) > 0 1018 end) 1019 1020 --- Return parser name for language (if exists) or filetype (if registered and exists). 1021 --- 1022 ---@param alias string language or filetype name 1023 ---@return string? # resolved parser name 1024 local function resolve_lang(alias) 1025 -- validate that `alias` is a legal language 1026 if not (alias and alias:match('[%w_]+') == alias) then 1027 return 1028 end 1029 1030 if has_parser(alias) then 1031 return alias 1032 end 1033 1034 local lang = vim.treesitter.language.get_lang(alias) 1035 if lang and has_parser(lang) then 1036 return lang 1037 end 1038 end 1039 1040 ---@private 1041 --- Extract injections according to: 1042 --- https://tree-sitter.github.io/tree-sitter/3-syntax-highlighting.html#language-injection 1043 ---@param match table<integer,TSNode[]> 1044 ---@param metadata vim.treesitter.query.TSMetadata 1045 ---@return string?, boolean, Range6[] 1046 function LanguageTree:_get_injection(match, metadata) 1047 local ranges = {} ---@type Range6[] 1048 local combined = metadata['injection.combined'] ~= nil 1049 local injection_lang = metadata['injection.language'] --[[@as string?]] 1050 local lang = metadata['injection.self'] ~= nil and self:lang() 1051 or metadata['injection.parent'] ~= nil and self._parent:lang() 1052 or (injection_lang and resolve_lang(injection_lang)) 1053 local include_children = metadata['injection.include-children'] ~= nil 1054 1055 for id, nodes in pairs(match) do 1056 for _, node in ipairs(nodes) do 1057 local name = self._injection_query.captures[id] 1058 -- Lang should override any other language tag 1059 if name == 'injection.language' then 1060 local text = vim.treesitter.get_node_text(node, self._source, { metadata = metadata[id] }) 1061 lang = resolve_lang(text:lower()) -- language names are always lower case 1062 elseif name == 'injection.filename' then 1063 local text = vim.treesitter.get_node_text(node, self._source, { metadata = metadata[id] }) 1064 local ft = vim.filetype.match({ filename = text }) 1065 lang = ft and resolve_lang(ft) 1066 elseif name == 'injection.content' then 1067 for _, range in ipairs(get_node_ranges(node, self._source, metadata[id], include_children)) do 1068 ranges[#ranges + 1] = range 1069 end 1070 end 1071 end 1072 end 1073 1074 return lang, combined, ranges 1075 end 1076 1077 --- Gets language injection regions by language. 1078 --- 1079 --- This is where most of the injection processing occurs. 1080 --- 1081 --- TODO: Allow for an offset predicate to tailor the injection range 1082 --- instead of using the entire nodes range. 1083 --- @private 1084 --- @param range Range|Range[]|true 1085 --- @param thread_state ParserThreadState 1086 --- @return table<string, Range6[][]> 1087 function LanguageTree:_get_injections(range, thread_state) 1088 if not self._injection_query or #self._injection_query.captures == 0 then 1089 self._processed_injection_region = entire_document_range 1090 return {} 1091 end 1092 1093 local start = hrtime() 1094 1095 ---@type table<string,Range6[][]> 1096 local result = {} 1097 1098 local full_scan = range == true or self._injection_query.has_combined_injections 1099 if not full_scan and type(range[1]) ~= 'table' then 1100 ---@diagnostic disable-next-line: missing-fields, assign-type-mismatch 1101 range = { range } 1102 end 1103 ---@cast range Range[] 1104 1105 for tree_index, tree in pairs(self._trees) do 1106 ---@type vim.treesitter.languagetree.Injection 1107 local injections = {} 1108 local root_node = tree:root() 1109 local parent_ranges = self._regions and self._regions[tree_index] or nil 1110 local scan_region ---@type Range4[] 1111 if full_scan then 1112 --- @diagnostic disable-next-line: missing-fields LuaLS varargs bug 1113 scan_region = { { root_node:range() } } 1114 else 1115 scan_region = range 1116 end 1117 1118 for _, r in ipairs(scan_region) do 1119 local start_line, _, end_line, _ = Range.unpack4(r) 1120 for pattern, match, metadata in 1121 self._injection_query:iter_matches(root_node, self._source, start_line, end_line + 1) 1122 do 1123 local lang, combined, ranges = self:_get_injection(match, metadata) 1124 if lang then 1125 add_injection(injections, pattern, lang, combined, ranges, parent_ranges, result) 1126 else 1127 self:_log('match from injection query failed for pattern', pattern) 1128 end 1129 1130 -- Check the current function duration against the timeout, if it exists. 1131 local current_time = hrtime() 1132 self:_subtract_time(thread_state, current_time - start) 1133 start = hrtime() 1134 end 1135 end 1136 end 1137 1138 if full_scan then 1139 self._processed_injection_region = entire_document_range 1140 else 1141 self._processed_injection_region = range 1142 end 1143 1144 return result 1145 end 1146 1147 ---@private 1148 ---@param cb_name TSCallbackName 1149 function LanguageTree:_do_callback(cb_name, ...) 1150 for _, cb in ipairs(self._callbacks[cb_name]) do 1151 cb(...) 1152 end 1153 for _, cb in ipairs(self._callbacks_rec[cb_name]) do 1154 cb(...) 1155 end 1156 end 1157 1158 ---@package 1159 function LanguageTree:_edit( 1160 start_byte, 1161 end_byte_old, 1162 end_byte_new, 1163 start_row, 1164 start_col, 1165 end_row_old, 1166 end_col_old, 1167 end_row_new, 1168 end_col_new 1169 ) 1170 for i, tree in pairs(self._trees) do 1171 self._trees[i] = tree:edit( 1172 start_byte, 1173 end_byte_old, 1174 end_byte_new, 1175 start_row, 1176 start_col, 1177 end_row_old, 1178 end_col_old, 1179 end_row_new, 1180 end_col_new 1181 ) 1182 end 1183 1184 self._parser:reset() 1185 1186 if self._regions then 1187 local regions = {} ---@type table<integer, Range6[]> 1188 for i, tree in pairs(self._trees) do 1189 regions[i] = tree:included_ranges(true) 1190 end 1191 self._regions = regions 1192 end 1193 1194 local changed_range = { 1195 start_row, 1196 start_col, 1197 start_byte, 1198 end_row_old, 1199 end_col_old, 1200 end_byte_old, 1201 } 1202 1203 -- Validate regions after editing the tree 1204 self:_iter_regions(function(_, region) 1205 if #region == 0 then 1206 -- empty region, use the full source 1207 return false 1208 end 1209 for _, r in ipairs(region) do 1210 if Range.intercepts(r, changed_range) then 1211 return false 1212 end 1213 end 1214 return true 1215 end) 1216 1217 for _, child in pairs(self._children) do 1218 child:_edit( 1219 start_byte, 1220 end_byte_old, 1221 end_byte_new, 1222 start_row, 1223 start_col, 1224 end_row_old, 1225 end_col_old, 1226 end_row_new, 1227 end_col_new 1228 ) 1229 end 1230 end 1231 1232 ---@param bufnr integer 1233 ---@param changed_tick integer 1234 ---@param start_row integer 1235 ---@param start_col integer 1236 ---@param start_byte integer 1237 ---@param old_row integer 1238 ---@param old_col integer 1239 ---@param old_byte integer 1240 ---@param new_row integer 1241 ---@param new_col integer 1242 ---@param new_byte integer 1243 function LanguageTree:_on_bytes( 1244 bufnr, 1245 changed_tick, 1246 start_row, 1247 start_col, 1248 start_byte, 1249 old_row, 1250 old_col, 1251 old_byte, 1252 new_row, 1253 new_col, 1254 new_byte 1255 ) 1256 local old_end_col = old_col + ((old_row == 0) and start_col or 0) 1257 local new_end_col = new_col + ((new_row == 0) and start_col or 0) 1258 1259 self:_log( 1260 'on_bytes', 1261 bufnr, 1262 changed_tick, 1263 start_row, 1264 start_col, 1265 start_byte, 1266 old_row, 1267 old_col, 1268 old_byte, 1269 new_row, 1270 new_col, 1271 new_byte 1272 ) 1273 1274 -- Edit trees together BEFORE emitting a bytes callback. 1275 self:_edit( 1276 start_byte, 1277 start_byte + old_byte, 1278 start_byte + new_byte, 1279 start_row, 1280 start_col, 1281 start_row + old_row, 1282 old_end_col, 1283 start_row + new_row, 1284 new_end_col 1285 ) 1286 1287 self:_do_callback( 1288 'bytes', 1289 bufnr, 1290 changed_tick, 1291 start_row, 1292 start_col, 1293 start_byte, 1294 old_row, 1295 old_col, 1296 old_byte, 1297 new_row, 1298 new_col, 1299 new_byte 1300 ) 1301 end 1302 1303 function LanguageTree:_on_reload() 1304 self:invalidate(true) 1305 end 1306 1307 function LanguageTree:_on_detach(...) 1308 self:invalidate(true) 1309 self:_do_callback('detach', ...) 1310 if self._logfile then 1311 self._logger('nvim', 'detaching') 1312 self._logger = nil 1313 self._logfile:close() 1314 end 1315 end 1316 1317 --- Registers callbacks for the [LanguageTree]. 1318 ---@param cbs table<TSCallbackNameOn,function> An [nvim_buf_attach()]-like table argument with the following handlers: 1319 --- - `on_bytes` : see [nvim_buf_attach()]. 1320 --- - `on_changedtree` : a callback that will be called every time the tree has syntactical changes. 1321 --- It will be passed two arguments: a table of the ranges (as node ranges) that 1322 --- changed and the changed tree. 1323 --- - `on_child_added` : emitted when a child is added to the tree. 1324 --- - `on_child_removed` : emitted when a child is removed from the tree. 1325 --- - `on_detach` : emitted when the buffer is detached, see [nvim_buf_detach_event]. 1326 --- Takes one argument, the number of the buffer. 1327 --- @param recursive? boolean Apply callbacks recursively for all children. Any new children will 1328 --- also inherit the callbacks. 1329 function LanguageTree:register_cbs(cbs, recursive) 1330 if not cbs then 1331 return 1332 end 1333 1334 local callbacks = recursive and self._callbacks_rec or self._callbacks 1335 1336 for name, cbname in pairs(TSCallbackNames) do 1337 if cbs[name] then 1338 table.insert(callbacks[cbname], cbs[name]) 1339 end 1340 end 1341 1342 if recursive then 1343 for _, child in pairs(self._children) do 1344 child:register_cbs(cbs, true) 1345 end 1346 end 1347 end 1348 1349 ---@param tree TSTree 1350 ---@param range Range 1351 ---@return boolean 1352 local function tree_contains(tree, range) 1353 local tree_ranges = tree:included_ranges(false) 1354 1355 for _, tree_range in ipairs(tree_ranges) do 1356 if Range.contains(tree_range, range) then 1357 return true 1358 end 1359 end 1360 1361 return false 1362 end 1363 1364 --- Determines whether {range} is contained in the |LanguageTree|. 1365 --- 1366 ---@param range Range4 1367 ---@return boolean 1368 function LanguageTree:contains(range) 1369 for _, tree in pairs(self._trees) do 1370 if tree_contains(tree, range) then 1371 return true 1372 end 1373 end 1374 1375 return false 1376 end 1377 1378 --- @class vim.treesitter.LanguageTree.tree_for_range.Opts 1379 --- @inlinedoc 1380 --- 1381 --- Ignore injected languages 1382 --- (default: `true`) 1383 --- @field ignore_injections? boolean 1384 1385 --- Gets the tree that contains {range}. 1386 --- 1387 ---@param range Range4 1388 ---@param opts? vim.treesitter.LanguageTree.tree_for_range.Opts 1389 ---@return TSTree? 1390 function LanguageTree:tree_for_range(range, opts) 1391 opts = opts or {} 1392 local ignore = vim.F.if_nil(opts.ignore_injections, true) 1393 1394 if not ignore then 1395 for _, child in pairs(self._children) do 1396 local tree = child:tree_for_range(range, opts) 1397 if tree then 1398 return tree 1399 end 1400 end 1401 end 1402 1403 for _, tree in pairs(self._trees) do 1404 if tree_contains(tree, range) then 1405 return tree 1406 end 1407 end 1408 1409 return nil 1410 end 1411 1412 --- Gets the smallest node that contains {range}. 1413 --- 1414 ---@param range Range4 1415 ---@param opts? vim.treesitter.LanguageTree.tree_for_range.Opts 1416 ---@return TSNode? 1417 function LanguageTree:node_for_range(range, opts) 1418 local tree = self:tree_for_range(range, opts) 1419 if tree then 1420 return tree:root():descendant_for_range(unpack(range)) 1421 end 1422 end 1423 1424 --- Gets the smallest named node that contains {range}. 1425 --- 1426 ---@param range Range4 1427 ---@param opts? vim.treesitter.LanguageTree.tree_for_range.Opts 1428 ---@return TSNode? 1429 function LanguageTree:named_node_for_range(range, opts) 1430 local tree = self:tree_for_range(range, opts) 1431 if tree then 1432 return tree:root():named_descendant_for_range(unpack(range)) 1433 end 1434 end 1435 1436 --- Gets the appropriate language that contains {range}. 1437 --- 1438 ---@param range Range4 1439 ---@return vim.treesitter.LanguageTree tree Managing {range} 1440 function LanguageTree:language_for_range(range) 1441 for _, child in pairs(self._children) do 1442 if child:contains(range) then 1443 return child:language_for_range(range) 1444 end 1445 end 1446 1447 return self 1448 end 1449 1450 return LanguageTree