neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

util.lua (10916B)


      1 -- TODO(justinmk): move most of this to `vim.text`.
      2 
      3 local fmt = string.format
      4 
      5 --- @class nvim.util.MDNode
      6 --- @field [integer] nvim.util.MDNode
      7 --- @field type string
      8 --- @field text? string
      9 
     10 local INDENTATION = 4
     11 
     12 local NBSP = string.char(160)
     13 
     14 local M = {}
     15 
     16 local function contains(t, xs)
     17  return vim.tbl_contains(xs, t)
     18 end
     19 
     20 -- Map of api_level:version, by inspection of:
     21 --    :lua= vim.mpack.decode(vim.fn.readfile('test/functional/fixtures/api_level_9.mpack','B')).version
     22 M.version_level = {
     23  [14] = '0.12.0',
     24  [13] = '0.11.0',
     25  [12] = '0.10.0',
     26  [11] = '0.9.0',
     27  [10] = '0.8.0',
     28  [9] = '0.7.0',
     29  [8] = '0.6.0',
     30  [7] = '0.5.0',
     31  [6] = '0.4.0',
     32  [5] = '0.3.2',
     33  [4] = '0.3.0',
     34  [3] = '0.2.1',
     35  [2] = '0.2.0',
     36  [1] = '0.1.0',
     37 }
     38 
     39 --- @param txt string
     40 --- @param srow integer
     41 --- @param scol integer
     42 --- @param erow? integer
     43 --- @param ecol? integer
     44 --- @return string
     45 local function slice_text(txt, srow, scol, erow, ecol)
     46  local lines = vim.split(txt, '\n')
     47 
     48  if srow == erow then
     49    return lines[srow + 1]:sub(scol + 1, ecol)
     50  end
     51 
     52  if erow then
     53    -- Trim the end
     54    for _ = erow + 2, #lines do
     55      table.remove(lines, #lines)
     56    end
     57  end
     58 
     59  -- Trim the start
     60  for _ = 1, srow do
     61    table.remove(lines, 1)
     62  end
     63 
     64  lines[1] = lines[1]:sub(scol + 1)
     65  lines[#lines] = lines[#lines]:sub(1, ecol)
     66 
     67  return table.concat(lines, '\n')
     68 end
     69 
     70 --- @param text string
     71 --- @return nvim.util.MDNode
     72 local function parse_md_inline(text)
     73  local parser = vim.treesitter.languagetree.new(text, 'markdown_inline')
     74  local root = parser:parse(true)[1]:root()
     75 
     76  --- @param node TSNode
     77  --- @return nvim.util.MDNode?
     78  local function extract(node)
     79    local ntype = node:type()
     80 
     81    if ntype:match('^%p$') then
     82      return
     83    end
     84 
     85    --- @type table<any,any>
     86    local ret = { type = ntype }
     87    ret.text = vim.treesitter.get_node_text(node, text)
     88 
     89    local row, col = 0, 0
     90 
     91    for child, child_field in node:iter_children() do
     92      local e = extract(child)
     93      if e and ntype == 'inline' then
     94        local srow, scol = child:start()
     95        if (srow == row and scol > col) or srow > row then
     96          local t = slice_text(ret.text, row, col, srow, scol)
     97          if t and t ~= '' then
     98            table.insert(ret, { type = 'text', j = true, text = t })
     99          end
    100        end
    101        row, col = child:end_()
    102      end
    103 
    104      if child_field then
    105        ret[child_field] = e
    106      else
    107        table.insert(ret, e)
    108      end
    109    end
    110 
    111    if ntype == 'inline' and (row > 0 or col > 0) then
    112      local t = slice_text(ret.text, row, col)
    113      if t and t ~= '' then
    114        table.insert(ret, { type = 'text', text = t })
    115      end
    116    end
    117 
    118    return ret
    119  end
    120 
    121  return extract(root) or {}
    122 end
    123 
    124 --- @param text string
    125 --- @return nvim.util.MDNode
    126 local function parse_md(text)
    127  local parser = vim.treesitter.languagetree.new(text, 'markdown', {
    128    injections = { markdown = '' },
    129  })
    130 
    131  local root = parser:parse(true)[1]:root()
    132 
    133  local EXCLUDE_TEXT_TYPE = {
    134    list = true,
    135    list_item = true,
    136    section = true,
    137    document = true,
    138    fenced_code_block = true,
    139    fenced_code_block_delimiter = true,
    140  }
    141 
    142  --- @param node TSNode
    143  --- @return nvim.util.MDNode?
    144  local function extract(node)
    145    local ntype = node:type()
    146 
    147    if ntype:match('^%p$') or contains(ntype, { 'block_continuation' }) then
    148      return
    149    end
    150 
    151    --- @type table<any,any>
    152    local ret = { type = ntype }
    153 
    154    if not EXCLUDE_TEXT_TYPE[ntype] then
    155      ret.text = vim.treesitter.get_node_text(node, text)
    156    end
    157 
    158    if ntype == 'inline' then
    159      ret = parse_md_inline(ret.text)
    160    end
    161 
    162    for child, child_field in node:iter_children() do
    163      local e = extract(child)
    164      if child_field then
    165        ret[child_field] = e
    166      else
    167        table.insert(ret, e)
    168      end
    169    end
    170 
    171    return ret
    172  end
    173 
    174  return extract(root) or {}
    175 end
    176 
    177 --- Prefixes each line in `text`.
    178 ---
    179 --- Does not wrap, not important for "meta" files? (You probably want md_to_vimdoc instead.)
    180 ---
    181 --- @param text string
    182 --- @param prefix_ string
    183 function M.prefix_lines(prefix_, text)
    184  local r = ''
    185  for _, l in ipairs(vim.split(text, '\n', { plain = true })) do
    186    r = r .. vim.trim(prefix_ .. l) .. '\n'
    187  end
    188  return r
    189 end
    190 
    191 --- @param x string
    192 --- @param start_indent integer
    193 --- @param indent integer
    194 --- @param text_width integer
    195 --- @return string
    196 function M.wrap(x, start_indent, indent, text_width)
    197  local words = vim.split(vim.trim(x), '%s+')
    198  local parts = { string.rep(' ', start_indent) } --- @type string[]
    199  local count = indent
    200 
    201  for i, w in ipairs(words) do
    202    if count > indent and count + #w > text_width - 1 then
    203      parts[#parts + 1] = '\n'
    204      parts[#parts + 1] = string.rep(' ', indent)
    205      count = indent
    206    elseif i ~= 1 then
    207      parts[#parts + 1] = ' '
    208      count = count + 1
    209    end
    210    count = count + #w
    211    parts[#parts + 1] = w
    212  end
    213 
    214  return (table.concat(parts):gsub('%s+\n', '\n'):gsub('\n+$', ''))
    215 end
    216 
    217 --- @param node nvim.util.MDNode
    218 --- @param start_indent integer
    219 --- @param indent integer
    220 --- @param text_width integer
    221 --- @param level integer
    222 --- @return string[]
    223 local function render_md(node, start_indent, indent, text_width, level, is_list)
    224  local parts = {} --- @type string[]
    225 
    226  -- For debugging
    227  local add_tag = false
    228  -- local add_tag = true
    229 
    230  local ntype = node.type
    231 
    232  if add_tag then
    233    parts[#parts + 1] = '<' .. ntype .. '>'
    234  end
    235 
    236  if ntype == 'text' then
    237    parts[#parts + 1] = node.text
    238  elseif ntype == 'html_tag' then
    239    error('html_tag: ' .. node.text)
    240  elseif ntype == 'inline_link' then
    241    vim.list_extend(parts, { '*', node[1].text, '*' })
    242  elseif ntype == 'shortcut_link' then
    243    if node[1].text:find('^<.*>$') then
    244      parts[#parts + 1] = node[1].text
    245    elseif node[1].text:find('^%d+$') then
    246      vim.list_extend(parts, { '[', node[1].text, ']' })
    247    else
    248      vim.list_extend(parts, { '|', node[1].text, '|' })
    249    end
    250  elseif ntype == 'backslash_escape' then
    251    parts[#parts + 1] = node.text
    252  elseif ntype == 'emphasis' then
    253    parts[#parts + 1] = node.text:sub(2, -2)
    254  elseif ntype == 'code_span' then
    255    vim.list_extend(parts, { '`', node.text:sub(2, -2):gsub(' ', NBSP), '`' })
    256  elseif ntype == 'inline' then
    257    if #node == 0 then
    258      local text = assert(node.text)
    259      parts[#parts + 1] = M.wrap(text, start_indent, indent, text_width)
    260    else
    261      for _, child in ipairs(node) do
    262        vim.list_extend(parts, render_md(child, start_indent, indent, text_width, level + 1))
    263      end
    264    end
    265  elseif ntype == 'paragraph' then
    266    local pparts = {}
    267    for _, child in ipairs(node) do
    268      vim.list_extend(pparts, render_md(child, start_indent, indent, text_width, level + 1))
    269    end
    270    parts[#parts + 1] = M.wrap(table.concat(pparts), start_indent, indent, text_width)
    271    parts[#parts + 1] = '\n'
    272  elseif ntype == 'code_fence_content' then
    273    local lines = vim.split(node.text:gsub('\n%s*$', ''), '\n')
    274 
    275    local cindent = indent + INDENTATION
    276    if level > 3 then
    277      -- The tree-sitter markdown parser doesn't parse the code blocks indents
    278      -- correctly in lists. Fudge it!
    279      lines[1] = '    ' .. lines[1] -- ¯\_(ツ)_/¯
    280      cindent = indent - level
    281      local _, initial_indent = lines[1]:find('^%s*')
    282      initial_indent = initial_indent + cindent
    283      if initial_indent < indent then
    284        cindent = indent - INDENTATION
    285      end
    286    end
    287 
    288    for _, l in ipairs(lines) do
    289      if #l > 0 then
    290        parts[#parts + 1] = string.rep(' ', cindent)
    291        parts[#parts + 1] = l
    292      end
    293      parts[#parts + 1] = '\n'
    294    end
    295  elseif ntype == 'fenced_code_block' then
    296    parts[#parts + 1] = '>'
    297    for _, child in ipairs(node) do
    298      if child.type == 'info_string' then
    299        parts[#parts + 1] = child.text
    300        break
    301      end
    302    end
    303    parts[#parts + 1] = '\n'
    304    for _, child in ipairs(node) do
    305      if child.type ~= 'info_string' then
    306        vim.list_extend(parts, render_md(child, start_indent, indent, text_width, level + 1))
    307      end
    308    end
    309    parts[#parts + 1] = '<\n'
    310  elseif ntype == 'html_block' then
    311    local text = node.text:gsub('^<pre>help', '')
    312    text = text:gsub('</pre>%s*$', '')
    313    parts[#parts + 1] = text
    314  elseif ntype == 'list_marker_dot' then
    315    parts[#parts + 1] = node.text
    316  elseif contains(ntype, { 'list_marker_minus', 'list_marker_star' }) then
    317    parts[#parts + 1] = '• '
    318  elseif ntype == 'list_item' then
    319    -- HACK(MariaSolOs): Revert this after the vimdoc parser supports numbered list-items (https://github.com/neovim/tree-sitter-vimdoc/issues/144)
    320    if (node[1].text or ''):match('[2-9]%.') then
    321      parts[#parts + 1] = '\n'
    322    end
    323    parts[#parts + 1] = string.rep(' ', indent)
    324    local offset = node[1].type == 'list_marker_dot' and 3 or 2
    325    for i, child in ipairs(node) do
    326      local sindent = i <= 2 and 0 or (indent + offset)
    327      vim.list_extend(
    328        parts,
    329        render_md(child, sindent, indent + offset, text_width, level + 1, true)
    330      )
    331    end
    332  else
    333    if node.text then
    334      error(fmt('cannot render:\n%s', vim.inspect(node)))
    335    end
    336    for i, child in ipairs(node) do
    337      local start_indent0 = i == 1 and start_indent or indent
    338      vim.list_extend(
    339        parts,
    340        render_md(child, start_indent0, indent, text_width, level + 1, is_list)
    341      )
    342      if ntype ~= 'list' and i ~= #node then
    343        if (node[i + 1] or {}).type ~= 'list' then
    344          parts[#parts + 1] = '\n'
    345        end
    346      end
    347    end
    348  end
    349 
    350  if add_tag then
    351    parts[#parts + 1] = '</' .. ntype .. '>'
    352  end
    353 
    354  return parts
    355 end
    356 
    357 --- @param text_width integer
    358 local function align_tags(text_width)
    359  --- @param line string
    360  --- @return string
    361  return function(line)
    362    local tag_pat = '%s*(%*.+%*)%s*$'
    363    local tags = {}
    364    for m in line:gmatch(tag_pat) do
    365      table.insert(tags, m)
    366    end
    367 
    368    if #tags > 0 then
    369      line = line:gsub(tag_pat, '')
    370      local tags_str = ' ' .. table.concat(tags, ' ')
    371      --- @type integer
    372      local conceal_offset = select(2, tags_str:gsub('%*', '')) - 2
    373      local pad = string.rep(' ', text_width - #line - #tags_str + conceal_offset)
    374      return line .. pad .. tags_str
    375    end
    376 
    377    return line
    378  end
    379 end
    380 
    381 --- @param text string
    382 --- @param start_indent integer
    383 --- @param indent integer
    384 --- @param is_list? boolean
    385 --- @return string
    386 function M.md_to_vimdoc(text, start_indent, indent, text_width, is_list)
    387  -- Add an extra newline so the parser can properly capture ending ```
    388  local parsed = parse_md(text .. '\n')
    389  local ret = render_md(parsed, start_indent, indent, text_width, 0, is_list)
    390 
    391  local lines = vim.split(table.concat(ret):gsub(NBSP, ' '), '\n')
    392 
    393  lines = vim.tbl_map(align_tags(text_width), lines)
    394 
    395  local s = table.concat(lines, '\n')
    396 
    397  -- Reduce whitespace in code-blocks
    398  s = s:gsub('\n+%s*>([a-z]+)\n', ' >%1\n')
    399  s = s:gsub('\n+%s*>\n?\n', ' >\n')
    400 
    401  return s
    402 end
    403 
    404 return M