neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

gen_help_html.lua (44462B)


      1 --- Converts Nvim :help files to HTML.  Validates |tag| links and document syntax (parser errors).
      2 --
      3 -- USAGE (For CI/local testing purposes): Simply `make lintdoc`, which basically does the following:
      4 --   1. :helptags ALL
      5 --   2. nvim -V1 -es +"lua require('src.gen.gen_help_html').run_validate()" +q
      6 --   3. nvim -V1 -es +"lua require('src.gen.gen_help_html').test_gen()" +q
      7 --
      8 -- USAGE (GENERATE HTML):
      9 --   1. `:helptags ALL` first; this script depends on vim.fn.taglist().
     10 --   2. nvim -V1 -es --clean +"lua require('src.gen.gen_help_html').gen('./runtime/doc', 'target/dir/')" +q
     11 --      - Read the docstring at gen().
     12 --   3. cd target/dir/ && jekyll serve --host 0.0.0.0
     13 --   4. Visit http://localhost:4000/…/help.txt.html
     14 --
     15 -- USAGE (VALIDATE):
     16 --   1. nvim -V1 -es +"lua require('src.gen.gen_help_html').validate('./runtime/doc')" +q
     17 --      - validate() is 10x faster than gen(), so it is used in CI.
     18 --   2. Check for unreachable URLs:
     19 --      nvim -V1 -es +"lua require('src.gen.gen_help_html').validate('./runtime/doc', true)" +q
     20 --
     21 -- SELF-TEST MODE:
     22 --   1. nvim -V1 -es +"lua require('src.gen.gen_help_html')._test()" +q
     23 --
     24 -- NOTES:
     25 --   * This script is used by the automation repo: https://github.com/neovim/doc
     26 --   * :helptags checks for duplicate tags, whereas this script checks _links_ (to tags).
     27 --   * gen() and validate() are the primary (programmatic) entrypoints. validate() only exists
     28 --     because gen() is too slow (~1 min) to run in per-commit CI.
     29 --   * visit_node() is the core function used by gen() to traverse the document tree and produce HTML.
     30 --   * visit_validate() is the core function used by validate().
     31 --   * Files in `new_layout` will be generated with a "flow" layout instead of preformatted/fixed-width layout.
     32 --
     33 -- TODO:
     34 --   * Conjoin listitem "blocks" (blank-separated). Example: starting.txt
     35 
     36 local pending_urls = 0
     37 local tagmap = nil ---@type table<string, string>
     38 local helpfiles = nil ---@type string[]
     39 local invalid_links = {} ---@type table<string, any>
     40 local invalid_urls = {} ---@type table<string, any>
     41 local invalid_spelling = {} ---@type table<string, table<string, string>>
     42 local spell_dict = {
     43  Neovim = 'Nvim',
     44  NeoVim = 'Nvim',
     45  neovim = 'Nvim',
     46  lua = 'Lua',
     47  VimL = 'Vimscript',
     48  vimL = 'Vimscript',
     49  viml = 'Vimscript',
     50  ['tree-sitter'] = 'treesitter',
     51  ['Tree-sitter'] = 'Treesitter',
     52 }
     53 --- specify the list of keywords to ignore (i.e. allow), or true to disable spell check completely.
     54 --- @type table<string, true|string[]>
     55 local spell_ignore_files = {
     56  ['credits.txt'] = { 'Neovim' },
     57  ['news.txt'] = { 'tree-sitter' }, -- in news, may refer to the upstream "tree-sitter" library
     58  ['news-0.10.txt'] = { 'tree-sitter' },
     59 }
     60 --- Punctuation that indicates a word is part of a path, module name, etc.
     61 --- Example: ".lua" is likely part of a filename, thus we don't want to enforce its spelling.
     62 local spell_punc = {
     63  ['.'] = true,
     64  ['/'] = true,
     65 }
     66 local language = nil
     67 
     68 local M = {}
     69 
     70 -- These files are generated with "flow" layout (non fixed-width, wrapped text paragraphs).
     71 -- All other files are "legacy" files which require fixed-width layout.
     72 local new_layout = {
     73  ['api.txt'] = true,
     74  ['lsp.txt'] = true,
     75  ['channel.txt'] = true,
     76  ['deprecated.txt'] = true,
     77  ['dev.txt'] = true,
     78  ['dev_arch.txt'] = true,
     79  ['dev_style.txt'] = true,
     80  ['dev_test.txt'] = true,
     81  ['dev_theme.txt'] = true,
     82  ['dev_tools.txt'] = true,
     83  ['dev_vimpatch.txt'] = true,
     84  ['diagnostic.txt'] = true,
     85  ['help.txt'] = true,
     86  ['faq.txt'] = true,
     87  ['gui.txt'] = true,
     88  ['intro.txt'] = true,
     89  ['lua.txt'] = true,
     90  ['lua-guide.txt'] = true,
     91  ['lua-plugin.txt'] = true,
     92  ['luaref.txt'] = true,
     93  ['news.txt'] = true,
     94  ['news-0.9.txt'] = true,
     95  ['news-0.10.txt'] = true,
     96  ['news-0.11.txt'] = true,
     97  ['news-0.12.txt'] = true,
     98  ['nvim.txt'] = true,
     99  ['pack.txt'] = true,
    100  ['provider.txt'] = true,
    101  ['tui.txt'] = true,
    102  ['ui.txt'] = true,
    103  ['vim_diff.txt'] = true,
    104 }
    105 
    106 -- Map of new-page:old-page, to redirect renamed pages.
    107 local redirects = {
    108  ['api-ui-events.txt'] = 'ui.txt',
    109  ['credits.txt'] = 'backers.txt',
    110  ['dev.txt'] = 'develop.txt',
    111  ['dev_tools.txt'] = 'debug.txt',
    112  ['plugins.txt'] = 'editorconfig.txt',
    113  ['terminal.txt'] = 'nvim_terminal_emulator.txt',
    114  ['tui.txt'] = 'term.txt',
    115 }
    116 
    117 -- TODO: These known invalid |links| require an update to the relevant docs.
    118 local exclude_invalid = {
    119  ["'string'"] = 'vimeval.txt',
    120  Query = 'treesitter.txt',
    121  matchit = 'vim_diff.txt',
    122  ['set!'] = 'treesitter.txt',
    123 }
    124 
    125 -- False-positive "invalid URLs".
    126 local exclude_invalid_urls = {
    127  ['http://aspell.net/man-html/Affix-Compression.html'] = 'spell.txt',
    128  ['http://aspell.net/man-html/Phonetic-Code.html'] = 'spell.txt',
    129  ['http://lua-users.org/wiki/StringLibraryTutorial'] = 'lua.txt',
    130  ['http://michael.toren.net/code/'] = 'pi_tar.txt',
    131  ['http://oldblog.antirez.com/post/redis-and-scripting.html'] = 'faq.txt',
    132  ['http://papp.plan9.de'] = 'syntax.txt',
    133  ['http://vimcasts.org'] = 'intro.txt',
    134  ['http://wiki.services.openoffice.org/wiki/Dictionaries'] = 'spell.txt',
    135  ['http://www.adapower.com'] = 'ft_ada.txt',
    136  ['http://www.jclark.com/'] = 'quickfix.txt',
    137  ['https://cacm.acm.org/research/a-look-at-the-design-of-lua/'] = 'faq.txt', -- blocks GHA?
    138  ['https://linux.die.net/man/2/poll'] = 'luvref.txt', -- blocks GHA?
    139 }
    140 
    141 -- Deprecated, brain-damaged files that I don't care about.
    142 local ignore_errors = {
    143  ['pi_netrw.txt'] = true,
    144  ['credits.txt'] = true,
    145 }
    146 
    147 local function tofile(fname, text)
    148  local f = io.open(fname, 'w')
    149  if not f then
    150    error(('failed to write: %s'):format(f))
    151  else
    152    f:write(text)
    153    f:close()
    154  end
    155 end
    156 
    157 ---@type fun(s: string): string
    158 local function html_esc(s)
    159  local html_entity =
    160    { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['{'] = '&#123;', ['}'] = '&#125;' }
    161  return (s and string.gsub(s, '[&<>{}]', html_entity) or nil)
    162 end
    163 
    164 local function url_encode(s)
    165  -- Credit: tpope / vim-unimpaired
    166  -- NOTE: these chars intentionally *not* escaped: ' ( )
    167  return vim.fn.substitute(
    168    vim.fn.iconv(s, 'latin1', 'utf-8'),
    169    [=[[^A-Za-z0-9()'_.~-]]=],
    170    [=[\="%".printf("%02X",char2nr(submatch(0)))]=],
    171    'g'
    172  )
    173 end
    174 
    175 local function to_titlecase(s)
    176  local text = ''
    177  for w in vim.gsplit(s, '[ \t]+') do
    178    text = ('%s %s%s'):format(text, vim.fn.toupper(w:sub(1, 1)), w:sub(2))
    179  end
    180  return text
    181 end
    182 
    183 local function to_heading_tag(text)
    184  -- Prepend "_" to avoid conflicts with actual :help tags.
    185  return text and string.format('_%s', vim.fn.tolower((text:gsub('%s+', '-')))) or 'unknown'
    186 end
    187 
    188 local function basename_noext(f)
    189  return vim.fs.basename(f:gsub('%.txt', ''))
    190 end
    191 
    192 local function is_blank(s)
    193  return not not s:find([[^[\t ]*$]])
    194 end
    195 
    196 ---@type fun(s: string, dir?:0|1|2): string
    197 local function trim(s, dir)
    198  return vim.fn.trim(s, '\r\t\n ', dir or 0)
    199 end
    200 
    201 --- Removes common punctuation from URLs.
    202 ---
    203 --- NOTE: this is currently a no-op, since known issues were fixed in the parser:
    204 --- https://github.com/neovim/tree-sitter-vimdoc/pull/157
    205 ---
    206 --- @param url string
    207 --- @return string, string (fixed_url, removed_chars) where `removed_chars` is in the order found in the input.
    208 local function fix_url(url)
    209  local removed_chars = ''
    210  local fixed_url = url
    211  -- Remove up to one of each char from end of the URL, in this order.
    212  -- for _, c in ipairs({ '.', ')', ',' }) do
    213  --   if fixed_url:sub(-1) == c then
    214  --     removed_chars = c .. removed_chars
    215  --     fixed_url = fixed_url:sub(1, -2)
    216  --   end
    217  -- end
    218  return fixed_url, removed_chars
    219 end
    220 
    221 --- Checks if a given line is a "noise" line that doesn't look good in HTML form.
    222 local function is_noise(line, noise_lines)
    223  if
    224    -- First line is always noise.
    225    (noise_lines ~= nil and vim.tbl_count(noise_lines) == 0)
    226    or line:find('Type .*gO.* to see the table of contents')
    227    -- Title line of traditional :help pages.
    228    -- Example: "NVIM REFERENCE MANUAL    by ..."
    229    or line:find([[^%s*N?VIM[ \t]*REFERENCE[ \t]*MANUAL]])
    230    -- First line of traditional :help pages.
    231    -- Example: "*api.txt*    Nvim"
    232    or line:find('%s*%*?[a-zA-Z]+%.txt%*?%s+N?[vV]im%s*$')
    233    -- modeline
    234    -- Example: "vim:tw=78:ts=8:sw=4:sts=4:et:ft=help:norl:"
    235    or line:find('^%s*vim?%:.*ft=help')
    236    or line:find('^%s*vim?%:.*filetype=help')
    237    or line:find('[*>]local%-additions[*<]')
    238  then
    239    -- table.insert(stats.noise_lines, getbuflinestr(root, opt.buf, 0))
    240    table.insert(noise_lines or {}, line)
    241    return true
    242  end
    243  return false
    244 end
    245 
    246 --- Creates a github issue URL at neovim/tree-sitter-vimdoc with prefilled content.
    247 --- @return string
    248 local function get_bug_url_vimdoc(fname, to_fname, sample_text)
    249  local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname))
    250  local bug_url = (
    251    'https://github.com/neovim/tree-sitter-vimdoc/issues/new?labels=bug&title=parse+error%3A+'
    252    .. vim.fs.basename(fname)
    253    .. '+&body=Found+%60tree-sitter-vimdoc%60+parse+error+at%3A+'
    254    .. this_url
    255    .. '%0D%0DContext%3A%0D%0D%60%60%60%0D'
    256    .. url_encode(sample_text)
    257    .. '%0D%60%60%60'
    258  )
    259  return bug_url
    260 end
    261 
    262 --- Creates a github issue URL at neovim/neovim with prefilled content.
    263 --- @return string
    264 local function get_bug_url_nvim(fname, to_fname, sample_text, token_name)
    265  local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname))
    266  local bug_url = (
    267    'https://github.com/neovim/neovim/issues/new?labels=bug&title=user+docs+HTML%3A+'
    268    .. vim.fs.basename(fname)
    269    .. '+&body=%60gen_help_html.lua%60+problem+at%3A+'
    270    .. this_url
    271    .. '%0D'
    272    .. (token_name and '+unhandled+token%3A+%60' .. token_name .. '%60' or '')
    273    .. '%0DContext%3A%0D%0D%60%60%60%0D'
    274    .. url_encode(sample_text)
    275    .. '%0D%60%60%60'
    276  )
    277  return bug_url
    278 end
    279 
    280 --- Gets a "foo" name from a "foo.txt" helpfile name.
    281 local function get_helppage(f, with_extension)
    282  if not f then
    283    return nil
    284  end
    285  -- Special case: help.txt is the "main landing page" of :help files, not index.txt.
    286  if f == 'index.txt' then
    287    return with_extension and 'vimindex.html' or 'vimindex/'
    288  elseif f == 'help.txt' then
    289    -- Hugo needs an `_index.html` (note the underscore) file to recognize it as section
    290    return with_extension and '_index.html' or ''
    291  end
    292 
    293  return (f:gsub('%.txt$', '')) .. (with_extension and '.html' or '/')
    294 end
    295 
    296 --- Counts leading spaces (tab=8) to decide the indent size of multiline text.
    297 ---
    298 --- Blank lines (empty or whitespace-only) are ignored.
    299 local function get_indent(s)
    300  local _, indent = vim.text.indent(0, s, { expandtab = 8 })
    301  return indent
    302 end
    303 
    304 --- Removes the common indent level, after expanding tabs to 8 spaces.
    305 local function trim_indent(s)
    306  return (vim.text.indent(0, s, { expandtab = 8 }))
    307 end
    308 
    309 --- Gets raw buffer text in the node's range (+/- an offset), as a newline-delimited string.
    310 ---@param node TSNode
    311 ---@param bufnr integer
    312 ---@param offset integer
    313 local function getbuflinestr(node, bufnr, offset)
    314  local line1, _, line2, _ = node:range()
    315  line1 = line1 - offset
    316  line2 = line2 + offset
    317  local lines = vim.fn.getbufline(bufnr, line1 + 1, line2 + 1)
    318  return table.concat(lines, '\n')
    319 end
    320 
    321 --- Gets the whitespace just before `node` from the raw buffer text.
    322 --- Needed for preformatted `old` lines.
    323 ---@param node TSNode
    324 ---@param bufnr integer
    325 ---@return string
    326 local function getws(node, bufnr)
    327  local line1, c1, line2, _ = node:range()
    328  ---@type string
    329  local raw = vim.fn.getbufline(bufnr, line1 + 1, line2 + 1)[1]
    330  local text_before = raw:sub(1, c1)
    331  local leading_ws = text_before:match('%s+$') or ''
    332  return leading_ws
    333 end
    334 
    335 local function get_tagname(node, bufnr)
    336  local text = vim.treesitter.get_node_text(node, bufnr)
    337  local tag = (node:type() == 'optionlink' or node:parent():type() == 'optionlink')
    338      and ("'%s'"):format(text)
    339    or text
    340  local helpfile = vim.fs.basename(tagmap[tag]) or nil -- "api.txt"
    341  local helppage = get_helppage(helpfile) -- "api.html"
    342  return helppage, tag
    343 end
    344 
    345 --- Returns true if the given invalid tagname is a false positive.
    346 local function ignore_invalid(s)
    347  return not not (
    348    exclude_invalid[s]
    349    -- Strings like |~/====| appear in various places and the parser thinks they are links, but they
    350    -- are just table borders.
    351    or s:find('===')
    352    or s:find('%-%-%-')
    353  )
    354 end
    355 
    356 local function ignore_parse_error(fname, s)
    357  if ignore_errors[vim.fs.basename(fname)] then
    358    return true
    359  end
    360  -- Ignore parse errors for unclosed tag.
    361  -- This is common in vimdocs and is treated as plaintext by :help.
    362  return s:find("^[`'|*]")
    363 end
    364 
    365 ---@param node TSNode
    366 local function has_ancestor(node, ancestor_name)
    367  local p = node ---@type TSNode?
    368  while p do
    369    p = p:parent()
    370    if not p or p:type() == 'help_file' then
    371      break
    372    elseif p:type() == ancestor_name then
    373      return true
    374    end
    375  end
    376  return false
    377 end
    378 
    379 --- Gets the first matching child node matching `name`.
    380 ---@param node TSNode
    381 local function first(node, name)
    382  for c, _ in node:iter_children() do
    383    if c:named() and c:type() == name then
    384      return c
    385    end
    386  end
    387  return nil
    388 end
    389 
    390 --- Gets the kind and node text of the previous and next siblings of node `n`.
    391 --- @param n any node
    392 local function get_prev_next(n)
    393  -- Previous sibling kind (string).
    394  local prev = n:prev_sibling()
    395      and (n:prev_sibling().named and n:prev_sibling():named())
    396      and n:prev_sibling():type()
    397    or nil
    398  -- Next sibling kind (string).
    399  local next_ = n:next_sibling()
    400      and (n:next_sibling().named and n:next_sibling():named())
    401      and n:next_sibling():type()
    402    or nil
    403  return prev, next_
    404 end
    405 
    406 local function validate_link(node, bufnr, fname)
    407  local helppage, tagname = get_tagname(node:child(1), bufnr)
    408  local ignored = false
    409  if not tagmap[tagname] then
    410    ignored = has_ancestor(node, 'column_heading') or node:has_error() or ignore_invalid(tagname)
    411    if not ignored then
    412      invalid_links[tagname] = vim.fs.basename(fname)
    413    end
    414  end
    415  return helppage, tagname, ignored
    416 end
    417 
    418 local function validate_url(text, fname, check_unreachable)
    419  fname = vim.fs.basename(fname)
    420  local ignored = ignore_errors[fname] or exclude_invalid_urls[text]
    421  if ignored then
    422    return true
    423  end
    424  if check_unreachable then
    425    vim.net.request(text, { retry = 2 }, function(err, _)
    426      if err then
    427        invalid_urls[text] = fname
    428      end
    429      pending_urls = pending_urls - 1
    430    end)
    431    pending_urls = pending_urls + 1
    432  else
    433    if text:find('http%:') then
    434      invalid_urls[text] = fname
    435    end
    436  end
    437  return false
    438 end
    439 
    440 --- Traverses the tree at `root` and checks that |tag| links point to valid helptags.
    441 ---@param root TSNode
    442 ---@param level integer
    443 ---@param lang_tree TSTree
    444 ---@param opt table
    445 ---@param stats table
    446 local function visit_validate(root, level, lang_tree, opt, stats)
    447  level = level or 0
    448 
    449  local function node_text(node)
    450    return vim.treesitter.get_node_text(node or root, opt.buf)
    451  end
    452 
    453  local text = trim(node_text())
    454  local node_name = (root.named and root:named()) and root:type() or nil
    455  -- Parent kind (string).
    456  local parent = root:parent() and root:parent():type() or nil
    457  local toplevel = level < 1
    458  -- local prev, next_ = get_prev_next(root)
    459  local prev_text = root:prev_sibling() and node_text(root:prev_sibling()) or nil
    460  local next_text = root:next_sibling() and node_text(root:next_sibling()) or nil
    461 
    462  if root:child_count() > 0 then
    463    for node, _ in root:iter_children() do
    464      if node:named() then
    465        visit_validate(node, level + 1, lang_tree, opt, stats)
    466      end
    467    end
    468  end
    469 
    470  if node_name == 'ERROR' then
    471    if ignore_parse_error(opt.fname, text) then
    472      return
    473    end
    474    -- Store the raw text to give context to the error report.
    475    local sample_text = not toplevel and getbuflinestr(root, opt.buf, 0) or '[top level!]'
    476    -- Flatten the sample text to a single, truncated line.
    477    sample_text = vim.trim(sample_text):gsub('[\t\n]', ' '):sub(1, 80)
    478    table.insert(stats.parse_errors, sample_text)
    479  elseif
    480    (node_name == 'word' or node_name == 'uppercase_name')
    481    and (not vim.tbl_contains({ 'codespan', 'taglink', 'tag' }, parent))
    482  then
    483    local text_nopunct = vim.fn.trim(text, '.,', 0) -- Ignore some punctuation.
    484    local fname_basename = assert(vim.fs.basename(opt.fname))
    485    if spell_dict[text_nopunct] then
    486      local should_ignore = (
    487        spell_ignore_files[fname_basename] == true
    488        or vim.tbl_contains(
    489          (spell_ignore_files[fname_basename] or {}) --[[ @as string[] ]],
    490          text_nopunct
    491        )
    492        or (spell_punc[next_text] or spell_punc[prev_text])
    493      )
    494      if not should_ignore then
    495        invalid_spelling[text_nopunct] = invalid_spelling[text_nopunct] or {}
    496        invalid_spelling[text_nopunct][fname_basename] = node_text(root:parent())
    497      end
    498    end
    499  elseif node_name == 'url' then
    500    local fixed_url, _ = fix_url(trim(text))
    501    validate_url(fixed_url, opt.fname, opt.request_urls)
    502  elseif node_name == 'taglink' or node_name == 'optionlink' then
    503    local _, _, _ = validate_link(root, opt.buf, opt.fname)
    504  end
    505 end
    506 
    507 -- Fix tab alignment issues caused by concealed characters like |, `, * in tags
    508 -- and code blocks.
    509 ---@param text string
    510 ---@param next_node_text string
    511 local function fix_tab_after_conceal(text, next_node_text)
    512  -- Vim tabs take into account the two concealed characters even though they
    513  -- are invisible, so we need to add back in the two spaces if this is
    514  -- followed by a tab to make the tab alignment to match Vim's behavior.
    515  if string.sub(next_node_text, 1, 1) == '\t' then
    516    text = text .. '  '
    517  end
    518  return text
    519 end
    520 
    521 ---@class (exact) nvim.gen_help_html.heading
    522 ---@field name string
    523 ---@field subheadings nvim.gen_help_html.heading[]
    524 ---@field tag string
    525 
    526 -- Generates HTML from node `root` recursively.
    527 ---@param root TSNode
    528 ---@param level integer
    529 ---@param lang_tree TSTree
    530 ---@param headings nvim.gen_help_html.heading[]
    531 ---@param opt table
    532 ---@param stats table
    533 local function visit_node(root, level, lang_tree, headings, opt, stats)
    534  level = level or 0
    535 
    536  local function node_text(node, ws_)
    537    node = node or root
    538    ws_ = (ws_ == nil or ws_ == true) and getws(node, opt.buf) or ''
    539    return string.format('%s%s', ws_, vim.treesitter.get_node_text(node, opt.buf))
    540  end
    541 
    542  -- Gets leading whitespace of `node`.
    543  local function ws(node)
    544    node = node or root
    545    local ws_ = getws(node, opt.buf)
    546    -- XXX: first node of a (line) includes whitespace, even after
    547    -- https://github.com/neovim/tree-sitter-vimdoc/pull/31 ?
    548    if ws_ == '' then
    549      ws_ = vim.treesitter.get_node_text(node, opt.buf):match('^%s+') or ''
    550    end
    551    return ws_
    552  end
    553 
    554  local node_name = (root.named and root:named()) and root:type() or nil
    555  local prev, next_ = get_prev_next(root)
    556  -- Parent kind (string).
    557  local parent = root:parent() and root:parent():type() or nil
    558 
    559  local text = ''
    560  local trimmed ---@type string
    561  if root:named_child_count() == 0 or node_name == 'ERROR' then
    562    text = node_text()
    563    trimmed = html_esc(trim(text))
    564    text = html_esc(text)
    565  else
    566    -- Process children and join them with whitespace.
    567    for node, _ in root:iter_children() do
    568      if node:named() then
    569        local r = visit_node(node, level + 1, lang_tree, headings, opt, stats)
    570        text = string.format('%s%s', text, r)
    571      end
    572    end
    573    trimmed = trim(text)
    574  end
    575 
    576  if node_name == 'help_file' then -- root node
    577    return text
    578  elseif node_name == 'url' then
    579    local fixed_url, removed_chars = fix_url(trimmed)
    580    return ('%s<a href="%s">%s</a>%s'):format(ws(), fixed_url, fixed_url, removed_chars)
    581  elseif node_name == 'word' or node_name == 'uppercase_name' then
    582    return text
    583  elseif node_name == 'note' then
    584    return ('<b>%s</b>'):format(text)
    585  elseif node_name == 'h1' or node_name == 'h2' or node_name == 'h3' then
    586    if is_noise(text, stats.noise_lines) then
    587      return '' -- Discard common "noise" lines.
    588    end
    589    -- Remove tags from ToC text.
    590    local heading_node = first(root, 'heading')
    591    local hname = trim(node_text(heading_node):gsub('%*.*%*', ''))
    592    if not heading_node or hname == '' then
    593      return '' -- Spurious "===" or "---" in the help doc.
    594    end
    595 
    596    -- Generate an anchor id from the heading text.
    597    local tagname = to_heading_tag(hname)
    598    if node_name == 'h1' or #headings == 0 then
    599      ---@type nvim.gen_help_html.heading
    600      local heading = { name = hname, subheadings = {}, tag = tagname }
    601      headings[#headings + 1] = heading
    602    else
    603      table.insert(
    604        headings[#headings].subheadings,
    605        { name = hname, subheadings = {}, tag = tagname }
    606      )
    607    end
    608    local el = node_name == 'h1' and 'h2' or 'h3'
    609    return ('<%s id="%s" class="help-heading">%s</%s>\n'):format(el, tagname, trimmed, el)
    610  elseif node_name == 'heading' then
    611    return trimmed
    612  elseif node_name == 'column_heading' or node_name == 'column_name' then
    613    if root:has_error() then
    614      return text
    615    end
    616    return ('<div class="help-column_heading">%s</div>'):format(text)
    617  elseif node_name == 'block' then
    618    if is_blank(text) then
    619      return ''
    620    end
    621    if opt.old then
    622      -- XXX: Treat "old" docs as preformatted: they use indentation for layout.
    623      --      Trim trailing newlines to avoid too much whitespace between divs.
    624      return ('<div class="old-help-para">%s</div>\n'):format(trim(text, 2))
    625    end
    626    return string.format('<div class="help-para">\n%s\n</div>\n', text)
    627  elseif node_name == 'line' then
    628    if
    629      (parent ~= 'codeblock' or parent ~= 'code')
    630      and (is_blank(text) or is_noise(text, stats.noise_lines))
    631    then
    632      return '' -- Discard common "noise" lines.
    633    end
    634    -- XXX: Avoid newlines (too much whitespace) after block elements in old (preformatted) layout.
    635    local div = opt.old
    636      and root:child(0)
    637      and vim.list_contains({ 'column_heading', 'h1', 'h2', 'h3' }, root:child(0):type())
    638    return string.format('%s%s', div and trim(text) or text, div and '' or '\n')
    639  elseif parent == 'line_li' and node_name == 'prefix' then
    640    return ''
    641  elseif node_name == 'line_li' then
    642    local prefix = first(root, 'prefix')
    643    local numli = prefix and trim(node_text(prefix)):match('%d') -- Numbered listitem?
    644    local sib = root:prev_sibling()
    645    local prev_li = sib and sib:type() == 'line_li'
    646    local cssclass = numli and 'help-li-num' or 'help-li'
    647 
    648    if not prev_li then
    649      opt.indent = 1
    650    else
    651      local sib_ws = ws(sib)
    652      local this_ws = ws()
    653      if get_indent(node_text()) == 0 then
    654        opt.indent = 1
    655      elseif this_ws > sib_ws then
    656        -- Previous sibling is logically the _parent_ if it is indented less.
    657        opt.indent = opt.indent + 1
    658      elseif this_ws < sib_ws then
    659        -- TODO(justinmk): This is buggy. Need to track exact whitespace length for each level.
    660        opt.indent = math.max(1, opt.indent - 1)
    661      end
    662    end
    663    local margin = opt.indent == 1 and '' or ('margin-left: %drem;'):format((1.5 * opt.indent))
    664 
    665    return string.format('<div class="%s" style="%s">%s</div>', cssclass, margin, text)
    666  elseif node_name == 'taglink' or node_name == 'optionlink' then
    667    local helppage, tagname, ignored = validate_link(root, opt.buf, opt.fname)
    668    if ignored or not helppage then
    669      return html_esc(node_text(root))
    670    end
    671    local s = ('%s<a href="/doc/user/%s#%s">%s</a>'):format(
    672      ws(),
    673      helppage,
    674      url_encode(tagname),
    675      html_esc(tagname)
    676    )
    677    if opt.old and node_name == 'taglink' then
    678      s = fix_tab_after_conceal(s, node_text(root:next_sibling()))
    679    end
    680    return s
    681  elseif vim.list_contains({ 'codespan', 'keycode' }, node_name) then
    682    if root:has_error() then
    683      return text
    684    end
    685    local s = ('%s<code>%s</code>'):format(ws(), trimmed)
    686    if opt.old and node_name == 'codespan' then
    687      s = fix_tab_after_conceal(s, node_text(root:next_sibling()))
    688    end
    689    return s
    690  elseif node_name == 'argument' then
    691    return ('%s<code>%s</code>'):format(ws(), trim(node_text(root)))
    692  elseif node_name == 'codeblock' then
    693    return text
    694  elseif node_name == 'language' then
    695    language = node_text(root)
    696    return ''
    697  elseif node_name == 'code' then -- Highlighted codeblock (child).
    698    if is_blank(text) then
    699      return ''
    700    end
    701    local code ---@type string
    702    if language then
    703      code = ('<pre><code class="language-%s">%s</code></pre>'):format(
    704        language,
    705        trim(trim_indent(text), 2)
    706      )
    707      language = nil
    708    else
    709      code = ('<pre>%s</pre>'):format(trim(trim_indent(text), 2))
    710    end
    711    return code
    712  elseif node_name == 'tag' then -- anchor, h4 pseudo-heading
    713    if root:has_error() then
    714      return text
    715    end
    716    local in_heading = vim.list_contains({ 'h1', 'h2', 'h3' }, parent)
    717    local h4 = not in_heading and not next_ and get_indent(node_text()) > 8 -- h4 pseudo-heading
    718    local cssclass = h4 and 'help-tag-right' or 'help-tag'
    719    local tagname = node_text(root:child(1), false)
    720    if vim.tbl_count(stats.first_tags) < 2 then
    721      -- Force the first 2 tags in the doc to be anchored at the main heading.
    722      table.insert(stats.first_tags, tagname)
    723      return ''
    724    end
    725    local el = 'span'
    726    local encoded_tagname = url_encode(tagname)
    727    local s = ('%s<%s id="%s" class="%s"><a href="#%s">%s</a></%s>'):format(
    728      ws(),
    729      el,
    730      encoded_tagname,
    731      cssclass,
    732      encoded_tagname,
    733      trimmed,
    734      el
    735    )
    736    if opt.old then
    737      s = fix_tab_after_conceal(s, node_text(root:next_sibling()))
    738    end
    739 
    740    if in_heading and prev ~= 'tag' then
    741      -- Start the <span> container for tags in a heading.
    742      -- This makes "justify-content:space-between" right-align the tags.
    743      --    <h2>foo bar<span>tag1 tag2</span></h2>
    744      return string.format('<span class="help-heading-tags">%s', s)
    745    elseif in_heading and next_ == nil then
    746      -- End the <span> container for tags in a heading.
    747      return string.format('%s</span>', s)
    748    end
    749    return s .. (h4 and '<br>' or '') -- HACK: <br> avoids h4 pseudo-heading mushing with text.
    750  elseif node_name == 'delimiter' or node_name == 'modeline' then
    751    return ''
    752  elseif node_name == 'ERROR' then
    753    if ignore_parse_error(opt.fname, trimmed) then
    754      return text
    755    end
    756 
    757    -- Store the raw text to give context to the bug report.
    758    local sample_text = level > 0 and getbuflinestr(root, opt.buf, 3) or '[top level!]'
    759    table.insert(stats.parse_errors, sample_text)
    760    return ('<a class="parse-error" target="_blank" title="Report bug... (parse error)" href="%s">%s</a>'):format(
    761      get_bug_url_vimdoc(opt.fname, opt.to_fname, sample_text),
    762      trimmed
    763    )
    764  else -- Unknown token.
    765    local sample_text = level > 0 and getbuflinestr(root, opt.buf, 3) or '[top level!]'
    766    return ('<a class="unknown-token" target="_blank" title="Report bug... (unhandled token "%s")" href="%s">%s</a>'):format(
    767      node_name,
    768      get_bug_url_nvim(opt.fname, opt.to_fname, sample_text, node_name),
    769      trimmed
    770    ),
    771      ('unknown-token:"%s"'):format(node_name)
    772  end
    773 end
    774 
    775 --- @param dir string e.g. '$VIMRUNTIME/doc'
    776 --- @param include string[]|nil
    777 --- @return string[]
    778 local function get_helpfiles(dir, include)
    779  local rv = {}
    780  for f, type in vim.fs.dir(dir) do
    781    if
    782      vim.endswith(f, '.txt')
    783      and type == 'file'
    784      and (not include or vim.list_contains(include, f))
    785    then
    786      local fullpath = vim.fn.fnamemodify(('%s/%s'):format(dir, f), ':p')
    787      table.insert(rv, fullpath)
    788    end
    789  end
    790  return rv
    791 end
    792 
    793 --- Populates the helptags map.
    794 local function _get_helptags(help_dir)
    795  local m = {}
    796  -- Load a random help file to convince taglist() to do its job.
    797  vim.cmd(string.format('split %s/api.txt', help_dir))
    798  vim.cmd('lcd %:p:h')
    799  for _, item in ipairs(vim.fn.taglist('.*')) do
    800    if vim.endswith(item.filename, '.txt') then
    801      m[item.name] = item.filename
    802    end
    803  end
    804  vim.cmd('q!')
    805 
    806  return m
    807 end
    808 
    809 --- Populates the helptags map.
    810 local function get_helptags(help_dir)
    811  local m = _get_helptags(help_dir)
    812 
    813  --- XXX: Append tags from netrw, until we remove it...
    814  local netrwtags = _get_helptags(vim.fs.normalize('$VIMRUNTIME/pack/dist/opt/netrw/doc/'))
    815  m = vim.tbl_extend('keep', m, netrwtags)
    816 
    817  return m
    818 end
    819 
    820 --- Use the vimdoc parser defined in the build, not whatever happens to be installed on the system.
    821 local function ensure_runtimepath()
    822  if not vim.o.runtimepath:find('build/lib/nvim/') then
    823    vim.cmd [[set runtimepath^=./build/lib/nvim/]]
    824  end
    825 end
    826 
    827 --- Opens `fname` (or `text`, if given) in a buffer and gets a treesitter parser for the buffer contents.
    828 ---
    829 --- @param fname string :help file to parse
    830 --- @param text string? :help file contents
    831 --- @return vim.treesitter.LanguageTree, integer (lang_tree, bufnr)
    832 local function parse_buf(fname, text)
    833  local buf ---@type integer
    834 
    835  if text then
    836    vim.cmd('split new') -- Text contents.
    837    vim.api.nvim_put(vim.split(text, '\n'), '', false, false)
    838    vim.cmd('setfiletype help')
    839    buf = vim.api.nvim_get_current_buf()
    840  elseif type(fname) == 'string' then
    841    vim.cmd('split ' .. vim.fn.fnameescape(fname)) -- Filename.
    842    buf = vim.api.nvim_get_current_buf()
    843  else
    844    -- Left for debugging
    845    ---@diagnostic disable-next-line: no-unknown
    846    buf = fname
    847    vim.cmd('sbuffer ' .. tostring(fname)) -- Buffer number.
    848  end
    849  local lang_tree = assert(vim.treesitter.get_parser(buf, nil, { error = false }))
    850  lang_tree:parse()
    851  return lang_tree, buf
    852 end
    853 
    854 --- Validates one :help file `fname`:
    855 ---  - checks that |tag| links point to valid helptags.
    856 ---  - recursively counts parse errors ("ERROR" nodes)
    857 ---
    858 --- @param fname string help file to validate
    859 --- @param request_urls boolean? whether to make requests to the URLs
    860 --- @return { invalid_links: number, parse_errors: string[] }
    861 local function validate_one(fname, request_urls)
    862  local stats = {
    863    parse_errors = {},
    864  }
    865  local lang_tree, buf = parse_buf(fname, nil)
    866  for _, tree in ipairs(lang_tree:trees()) do
    867    visit_validate(tree:root(), 0, tree, {
    868      buf = buf,
    869      fname = fname,
    870      request_urls = request_urls,
    871    }, stats)
    872  end
    873  lang_tree:destroy()
    874  vim.cmd.close()
    875  return stats
    876 end
    877 
    878 --- Generates HTML from one :help file `fname` and writes the result to `to_fname`.
    879 ---
    880 --- @param fname string Source :help file.
    881 --- @param text string|nil Source :help file contents, or nil to read `fname`.
    882 --- @param to_fname string Destination .html file
    883 --- @param old boolean Preformat paragraphs (for old :help files which are full of arbitrary whitespace)
    884 ---
    885 --- @return string html
    886 --- @return table stats
    887 local function gen_one(fname, text, to_fname, old, commit)
    888  local stats = {
    889    noise_lines = {},
    890    parse_errors = {},
    891    first_tags = {}, -- Track the first few tags in doc.
    892  }
    893  local lang_tree, buf = parse_buf(fname, text)
    894  ---@type nvim.gen_help_html.heading[]
    895  local headings = {} -- Headings (for ToC). 2-dimensional: h1 contains h2/h3.
    896  local title = to_titlecase(basename_noext(fname))
    897 
    898  local main = ''
    899  for _, tree in ipairs(lang_tree:trees()) do
    900    main = main
    901      .. (
    902        visit_node(
    903          tree:root(),
    904          0,
    905          tree,
    906          headings,
    907          { buf = buf, old = old, fname = fname, to_fname = to_fname, indent = 1 },
    908          stats
    909        )
    910      )
    911  end
    912 
    913  local frontmatter = vim.json.encode({
    914    title = title,
    915    layout = 'single', -- Hugo-specific, to make _index.html the same as the other pages
    916    aliases = { -- Hugo-specific, make /api.html redirect to /api/
    917      vim.fs.joinpath('/doc/user', vim.fs.basename(to_fname)),
    918    },
    919    params = {
    920      firstTag1 = stats.first_tags[1] or '',
    921      firstTag2 = stats.first_tags[2] or '',
    922      basename = vim.fs.basename(fname),
    923      commit = commit,
    924      parseErrors = #stats.parse_errors,
    925      bugUrl = get_bug_url_nvim(fname, to_fname, 'TODO', nil),
    926      noiseLines = html_esc(table.concat(stats.noise_lines, '\n')),
    927      noiseLinesCount = #stats.noise_lines,
    928      headings = headings,
    929    },
    930  }, { indent = '  ', sort_keys = true })
    931 
    932  local html = ('%s\n%s'):format(frontmatter, main)
    933 
    934  vim.cmd('q!')
    935  lang_tree:destroy()
    936  return html, stats
    937 end
    938 
    939 --- Generates a JSON map of tags to URL-encoded `filename#anchor` locations.
    940 ---
    941 ---@param fname string
    942 local function gen_helptags_json(fname)
    943  assert(tagmap, '`tagmap` not generated yet')
    944  local t = {} ---@type table<string, string>
    945  for tag, f in pairs(tagmap) do
    946    -- "foo.txt"
    947    local helpfile = vim.fs.basename(f)
    948    -- "foo.html"
    949    local htmlpage = assert(get_helppage(helpfile))
    950    -- "foo.html#tag"
    951    t[tag] = ('%s#%s'):format(htmlpage, url_encode(tag))
    952  end
    953  tofile(fname, vim.json.encode(t, { indent = '  ', sort_keys = true }))
    954 end
    955 
    956 local function gen_helptag_html(fname)
    957  local frontmatter = vim.json.encode({
    958    title = 'Helptag redirect',
    959    layout = 'helptag', -- Hugo-specific
    960  }, { indent = '  ', sort_keys = true })
    961  tofile(fname, frontmatter)
    962 end
    963 
    964 -- Testing
    965 
    966 local function ok(cond, expected, actual, message)
    967  assert(
    968    (not expected and not actual) or (expected and actual),
    969    'if "expected" is given, "actual" is also required'
    970  )
    971  if expected then
    972    assert(
    973      cond,
    974      ('%sexpected %s, got: %s'):format(
    975        message and (message .. '\n') or '',
    976        vim.inspect(expected),
    977        vim.inspect(actual)
    978      )
    979    )
    980  else
    981    assert(cond)
    982  end
    983 
    984  return true
    985 end
    986 local function eq(expected, actual, message)
    987  return ok(vim.deep_equal(expected, actual), expected, actual, message)
    988 end
    989 
    990 function M._test()
    991  tagmap = get_helptags('$VIMRUNTIME/doc')
    992  helpfiles = get_helpfiles(vim.fs.normalize('$VIMRUNTIME/doc'))
    993 
    994  ok(vim.tbl_count(tagmap) > 3000, '>3000', vim.tbl_count(tagmap))
    995  ok(
    996    vim.endswith(tagmap['vim.diagnostic.set()'], 'diagnostic.txt'),
    997    tagmap['vim.diagnostic.set()'],
    998    'diagnostic.txt'
    999  )
   1000  ok(vim.endswith(tagmap['%:s'], 'cmdline.txt'), tagmap['%:s'], 'cmdline.txt')
   1001  ok(is_noise([[vim:tw=78:isk=!-~,^*,^\|,^\":ts=8:noet:ft=help:norl:]]))
   1002  ok(is_noise([[          NVIM  REFERENCE  MANUAL     by  Thiago  de  Arruda      ]]))
   1003  ok(not is_noise([[vim:tw=78]]))
   1004 
   1005  eq(0, get_indent('a'))
   1006  eq(1, get_indent(' a'))
   1007  eq(2, get_indent('  a\n  b\n  c\n'))
   1008  eq(5, get_indent('     a\n      \n        b\n      c\n      d\n      e\n'))
   1009  eq(
   1010    'a\n        \n   b\n c\n d\n e\n',
   1011    trim_indent('     a\n             \n        b\n      c\n      d\n      e\n')
   1012  )
   1013 
   1014  local fixed_url, removed_chars = fix_url('https://example.com).')
   1015  eq('https://example.com', fixed_url)
   1016  eq(').', removed_chars)
   1017  fixed_url, removed_chars = fix_url('https://example.com.)')
   1018  eq('https://example.com.', fixed_url)
   1019  eq(')', removed_chars)
   1020  fixed_url, removed_chars = fix_url('https://example.com.')
   1021  eq('https://example.com', fixed_url)
   1022  eq('.', removed_chars)
   1023  fixed_url, removed_chars = fix_url('https://example.com)')
   1024  eq('https://example.com', fixed_url)
   1025  eq(')', removed_chars)
   1026  fixed_url, removed_chars = fix_url('https://example.com')
   1027  eq('https://example.com', fixed_url)
   1028  eq('', removed_chars)
   1029 
   1030  print('all tests passed.\n')
   1031 end
   1032 
   1033 --- @class nvim.gen_help_html.gen_result
   1034 --- @field helpfiles string[] list of generated HTML files, from the source docs {include}
   1035 --- @field err_count integer number of parse errors in :help docs
   1036 --- @field invalid_links table<string, any>
   1037 
   1038 --- Generates HTML from :help docs located in `help_dir` and writes the result in `to_dir`.
   1039 ---
   1040 --- Example:
   1041 ---
   1042 ---   gen('$VIMRUNTIME/doc', '/path/to/neovim.github.io/_site/doc/', {'api.txt', 'autocmd.txt', 'channel.txt'}, nil)
   1043 ---
   1044 --- @param help_dir string Source directory containing the :help files. Must run `make helptags` first.
   1045 --- @param to_dir string Target directory where the .html files will be written.
   1046 --- @param include string[]|nil Process only these filenames. Example: {'api.txt', 'autocmd.txt', 'channel.txt'}
   1047 --- @param commit string?
   1048 --- @param parser_path string? path to non-default vimdoc.so/dylib/dll
   1049 ---
   1050 --- @return nvim.gen_help_html.gen_result result
   1051 function M.gen(help_dir, to_dir, include, commit, parser_path)
   1052  vim.validate('help_dir', help_dir, function(d)
   1053    return vim.fn.isdirectory(vim.fs.normalize(d)) == 1
   1054  end, 'valid directory')
   1055  vim.validate('to_dir', to_dir, 'string')
   1056  vim.validate('include', include, 'table', true)
   1057  vim.validate('commit', commit, 'string', true)
   1058  vim.validate('parser_path', parser_path, function(f)
   1059    return vim.fn.filereadable(vim.fs.normalize(f)) == 1
   1060  end, true, 'valid vimdoc.{so,dll,dylib} filepath')
   1061 
   1062  local err_count = 0
   1063  local redirects_count = 0
   1064  ensure_runtimepath()
   1065 
   1066  parser_path = parser_path and vim.fs.normalize(parser_path) or nil
   1067  if parser_path then
   1068    -- XXX: Delete the installed .so files first, else this won't work :(
   1069    --    /usr/local/lib/nvim/parser/vimdoc.so
   1070    --    ./build/lib/nvim/parser/vimdoc.so
   1071    vim.treesitter.language.add('vimdoc', { path = parser_path })
   1072  end
   1073 
   1074  tagmap = get_helptags(vim.fs.normalize(help_dir))
   1075  helpfiles = get_helpfiles(help_dir, include)
   1076  to_dir = vim.fs.normalize(to_dir)
   1077 
   1078  print(('output dir: %s\n\n'):format(to_dir))
   1079  vim.fn.mkdir(to_dir, 'p')
   1080  -- NOTE: Better for Hugo to be in static/, but works fine with contents/ as to_dir
   1081  gen_helptags_json(('%s/helptags.json'):format(to_dir))
   1082  gen_helptag_html(('%s/helptag.html'):format(to_dir))
   1083 
   1084  for _, f in ipairs(helpfiles) do
   1085    -- "foo.txt"
   1086    local helpfile = vim.fs.basename(f)
   1087    -- "to/dir/foo.html"
   1088    local to_fname = ('%s/%s'):format(to_dir, get_helppage(helpfile, true))
   1089    local html, stats = gen_one(f, nil, to_fname, not new_layout[helpfile], commit or '?')
   1090    tofile(to_fname, html)
   1091    print(
   1092      ('generated (%-2s errors): %-15s => %s'):format(
   1093        #stats.parse_errors,
   1094        helpfile,
   1095        vim.fs.basename(to_fname)
   1096      )
   1097    )
   1098 
   1099    -- Generate redirect pages for renamed help files.
   1100    local helpfile_tag = (helpfile:gsub('%.txt$', '')):gsub('_', '-') -- "dev_tools.txt" => "dev-tools"
   1101    local redirect_from = redirects[helpfile]
   1102    if redirect_from then
   1103      local redirect_text = vim.text
   1104        .indent(
   1105          0,
   1106          [[
   1107          *%s*      Nvim
   1108 
   1109          Document moved to: |%s|
   1110 
   1111          ==============================================================================
   1112          Document moved
   1113 
   1114          Document moved to: |%s|
   1115 
   1116          ==============================================================================
   1117           vim:tw=78:ts=8:ft=help:norl:
   1118          ]]
   1119        )
   1120        :format(redirect_from, helpfile_tag, helpfile_tag, helpfile_tag, helpfile_tag, helpfile_tag)
   1121      local redirect_to = ('%s/%s'):format(to_dir, get_helppage(redirect_from, true))
   1122      local redirect_html, _ =
   1123        gen_one(redirect_from, redirect_text, redirect_to, false, commit or '?')
   1124      assert(
   1125        redirect_html:find(vim.pesc(helpfile_tag)),
   1126        ('not found in redirect html: %s'):format(helpfile_tag)
   1127      )
   1128      tofile(redirect_to, redirect_html)
   1129 
   1130      print(
   1131        ('generated (redirect) : %-15s => %s'):format(
   1132          redirect_from .. '.txt',
   1133          vim.fs.basename(to_fname)
   1134        )
   1135      )
   1136      redirects_count = redirects_count + 1
   1137    end
   1138 
   1139    err_count = err_count + #stats.parse_errors
   1140  end
   1141 
   1142  print(('\ngenerated %d html pages'):format(#helpfiles + redirects_count))
   1143  print(('total errors: %d'):format(err_count))
   1144  -- Why aren't the netrw tags found in neovim/docs/ CI?
   1145  print(('invalid tags: %s'):format(vim.inspect(invalid_links)))
   1146  eq(redirects_count, include and redirects_count or vim.tbl_count(redirects)) -- sanity check
   1147  print(('redirects: %d'):format(redirects_count))
   1148  print('\n')
   1149 
   1150  --- @type nvim.gen_help_html.gen_result
   1151  return {
   1152    helpfiles = helpfiles,
   1153    err_count = err_count,
   1154    invalid_links = invalid_links,
   1155  }
   1156 end
   1157 
   1158 --- @class nvim.gen_help_html.validate_result
   1159 --- @field helpfiles integer number of generated helpfiles
   1160 --- @field err_count integer number of parse errors
   1161 --- @field parse_errors table<string, string[]>
   1162 --- @field invalid_links table<string, any> invalid tags in :help docs
   1163 --- @field invalid_urls table<string, any> invalid URLs in :help docs
   1164 --- @field invalid_spelling table<string, table<string, string>> invalid spelling in :help docs
   1165 
   1166 --- Validates all :help files found in `help_dir`:
   1167 ---  - checks that |tag| links point to valid helptags.
   1168 ---  - recursively counts parse errors ("ERROR" nodes)
   1169 ---
   1170 --- This is 10x faster than gen(), for use in CI.
   1171 ---
   1172 --- @return nvim.gen_help_html.validate_result result
   1173 function M.validate(help_dir, include, parser_path, request_urls)
   1174  vim.validate('help_dir', help_dir, function(d)
   1175    return vim.fn.isdirectory(vim.fs.normalize(d)) == 1
   1176  end, 'valid directory')
   1177  vim.validate('include', include, 'table', true)
   1178  vim.validate('parser_path', parser_path, function(f)
   1179    return vim.fn.filereadable(vim.fs.normalize(f)) == 1
   1180  end, true, 'valid vimdoc.{so,dll,dylib} filepath')
   1181  local err_count = 0 ---@type integer
   1182  local files_to_errors = {} ---@type table<string, string[]>
   1183  ensure_runtimepath()
   1184 
   1185  parser_path = parser_path and vim.fs.normalize(parser_path) or nil
   1186  if parser_path then
   1187    -- XXX: Delete the installed .so files first, else this won't work :(
   1188    --    /usr/local/lib/nvim/parser/vimdoc.so
   1189    --    ./build/lib/nvim/parser/vimdoc.so
   1190    vim.treesitter.language.add('vimdoc', { path = parser_path })
   1191  end
   1192 
   1193  tagmap = get_helptags(vim.fs.normalize(help_dir))
   1194  helpfiles = get_helpfiles(help_dir, include)
   1195 
   1196  for _, f in ipairs(helpfiles) do
   1197    local helpfile = vim.fs.basename(f)
   1198    local rv = validate_one(f, request_urls)
   1199    print(('validated (%-4s errors): %s'):format(#rv.parse_errors, helpfile))
   1200    if #rv.parse_errors > 0 then
   1201      files_to_errors[helpfile] = rv.parse_errors
   1202      vim.print(('%s'):format(vim.iter(rv.parse_errors):fold('', function(s, v)
   1203        return s .. '\n    ' .. v
   1204      end)))
   1205    end
   1206    err_count = err_count + #rv.parse_errors
   1207  end
   1208 
   1209  -- Requests are async, wait for them to finish.
   1210  -- TODO(yochem): `:cancel()` tasks after #36146
   1211  vim.wait(20000, function()
   1212    return pending_urls <= 0
   1213  end)
   1214  ok(pending_urls <= 0, 'pending url checks', pending_urls)
   1215 
   1216  ---@type nvim.gen_help_html.validate_result
   1217  return {
   1218    helpfiles = #helpfiles,
   1219    err_count = err_count,
   1220    parse_errors = files_to_errors,
   1221    invalid_links = invalid_links,
   1222    invalid_urls = invalid_urls,
   1223    invalid_spelling = invalid_spelling,
   1224  }
   1225 end
   1226 
   1227 --- Validates vimdoc files in $VIMRUNTIME, and prints error messages on failure.
   1228 ---
   1229 --- If this fails, try these steps (in order):
   1230 --- 1. Fix/cleanup the :help docs.
   1231 --- 2. Fix the parser: https://github.com/neovim/tree-sitter-vimdoc
   1232 --- 3. File a parser bug, and adjust the tolerance of this test in the meantime.
   1233 ---
   1234 --- @param help_dir? string e.g. '$VIMRUNTIME/doc' or './runtime/doc'
   1235 --- @param request_urls? boolean make network requests to check if the URLs are reachable.
   1236 function M.run_validate(help_dir, request_urls)
   1237  help_dir = vim.fs.normalize(help_dir or '$VIMRUNTIME/doc')
   1238  print('doc path = ' .. vim.uv.fs_realpath(help_dir))
   1239 
   1240  local rv = M.validate(help_dir, nil, nil, request_urls)
   1241 
   1242  -- Check that we actually found helpfiles.
   1243  ok(rv.helpfiles > 100, '>100 :help files', rv.helpfiles)
   1244 
   1245  eq({}, rv.parse_errors, 'no parse errors')
   1246  eq(0, rv.err_count, 'no parse errors')
   1247  eq({}, rv.invalid_links, 'invalid tags in :help docs')
   1248  eq({}, rv.invalid_urls, 'invalid URLs in :help docs')
   1249  eq(
   1250    {},
   1251    rv.invalid_spelling,
   1252    'invalid spelling in :help docs (see spell_dict in src/gen/gen_help_html.lua)'
   1253  )
   1254 end
   1255 
   1256 --- Test-generates HTML from docs.
   1257 ---
   1258 --- 1. Test that gen_help_html.lua actually works.
   1259 --- 2. Test that parse errors did not increase wildly. Because we explicitly test only a few
   1260 ---    :help files, we can be precise about the tolerances here.
   1261 --- @param help_dir? string e.g. '$VIMRUNTIME/doc' or './runtime/doc'
   1262 function M.test_gen(help_dir)
   1263  local tmpdir = vim.fs.dirname(vim.fn.tempname())
   1264  help_dir = vim.fs.normalize(help_dir or '$VIMRUNTIME/doc')
   1265  print('doc path = ' .. vim.uv.fs_realpath(help_dir))
   1266 
   1267  -- Because gen() is slow (~30s), this test is limited to a few files.
   1268  local input = { 'api.txt', 'index.txt', 'nvim.txt' }
   1269  local rv = M.gen(help_dir, tmpdir, input)
   1270  eq(#input, #rv.helpfiles)
   1271  eq(0, rv.err_count, 'parse errors in :help docs')
   1272  eq({}, rv.invalid_links, 'invalid tags in :help docs')
   1273 end
   1274 
   1275 return M