neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

text.lua (7420B)


      1 -- Text processing functions.
      2 
      3 local M = {}
      4 
      5 --- Optional parameters:
      6 --- @class vim.text.diff.Opts
      7 --- @inlinedoc
      8 ---
      9 --- Invoked for each hunk in the diff. Return a negative number
     10 --- to cancel the callback for any remaining hunks.
     11 --- Arguments:
     12 ---   - `start_a` (`integer`): Start line of hunk in {a}.
     13 ---   - `count_a` (`integer`): Hunk size in {a}.
     14 ---   - `start_b` (`integer`): Start line of hunk in {b}.
     15 ---   - `count_b` (`integer`): Hunk size in {b}.
     16 --- @field on_hunk? fun(start_a: integer, count_a: integer, start_b: integer, count_b: integer): integer?
     17 ---
     18 --- Form of the returned diff:
     19 ---   - `unified`: String in unified format.
     20 ---   - `indices`: Array of hunk locations.
     21 --- Note: This option is ignored if `on_hunk` is used.
     22 --- (default: `'unified'`)
     23 --- @field result_type? 'unified'|'indices'
     24 ---
     25 --- Run linematch on the resulting hunks from xdiff. When integer, only hunks
     26 --- upto this size in lines are run through linematch.
     27 --- Requires `result_type = indices`, ignored otherwise.
     28 --- @field linematch? boolean|integer
     29 ---
     30 --- Diff algorithm to use. Values:
     31 ---   - `myers`: the default algorithm
     32 ---   - `minimal`: spend extra time to generate the smallest possible diff
     33 ---   - `patience`: patience diff algorithm
     34 ---   - `histogram`: histogram diff algorithm
     35 --- (default: `'myers'`)
     36 --- @field algorithm? 'myers'|'minimal'|'patience'|'histogram'
     37 --- @field ctxlen? integer Context length
     38 --- @field interhunkctxlen? integer Inter hunk context length
     39 --- @field ignore_whitespace? boolean Ignore whitespace
     40 --- @field ignore_whitespace_change? boolean Ignore whitespace change
     41 --- @field ignore_whitespace_change_at_eol? boolean Ignore whitespace change at end-of-line.
     42 --- @field ignore_cr_at_eol? boolean Ignore carriage return at end-of-line
     43 --- @field ignore_blank_lines? boolean Ignore blank lines
     44 --- @field indent_heuristic? boolean Use the indent heuristic for the internal diff library.
     45 
     46 -- luacheck: no unused args
     47 
     48 --- Run diff on strings {a} and {b}. Any indices returned by this function,
     49 --- either directly or via callback arguments, are 1-based.
     50 ---
     51 --- Examples:
     52 ---
     53 --- ```lua
     54 --- vim.text.diff('a\n', 'b\nc\n')
     55 --- -- =>
     56 --- -- @@ -1 +1,2 @@
     57 --- -- -a
     58 --- -- +b
     59 --- -- +c
     60 ---
     61 --- vim.text.diff('a\n', 'b\nc\n', {result_type = 'indices'})
     62 --- -- =>
     63 --- -- {
     64 --- --   {1, 1, 1, 2}
     65 --- -- }
     66 --- ```
     67 ---
     68 ---@diagnostic disable-next-line: undefined-doc-param
     69 ---@param a string First string to compare
     70 ---@diagnostic disable-next-line: undefined-doc-param
     71 ---@param b string Second string to compare
     72 ---@diagnostic disable-next-line: undefined-doc-param
     73 ---@param opts? vim.text.diff.Opts
     74 ---@return string|integer[][]? # See {opts.result_type}. `nil` if {opts.on_hunk} is given.
     75 function M.diff(...)
     76  ---@diagnostic disable-next-line: deprecated
     77  return vim.diff(...)
     78 end
     79 
     80 local alphabet = '0123456789ABCDEF'
     81 local atoi = {} ---@type table<string, integer>
     82 local itoa = {} ---@type table<integer, string>
     83 do
     84  for i = 1, #alphabet do
     85    local char = alphabet:sub(i, i)
     86    itoa[i - 1] = char
     87    atoi[char] = i - 1
     88    atoi[char:lower()] = i - 1
     89  end
     90 end
     91 
     92 --- Hex encode a string.
     93 ---
     94 --- @param str string String to encode
     95 --- @return string : Hex encoded string
     96 function M.hexencode(str)
     97  local enc = {} ---@type string[]
     98  for i = 1, #str do
     99    local byte = str:byte(i)
    100    enc[2 * i - 1] = itoa[math.floor(byte / 16)]
    101    enc[2 * i] = itoa[byte % 16]
    102  end
    103  return table.concat(enc)
    104 end
    105 
    106 --- Hex decode a string.
    107 ---
    108 --- @param enc string String to decode
    109 --- @return string? : Decoded string
    110 --- @return string? : Error message, if any
    111 function M.hexdecode(enc)
    112  if #enc % 2 ~= 0 then
    113    return nil, 'string must have an even number of hex characters'
    114  end
    115 
    116  local str = {} ---@type string[]
    117  for i = 1, #enc, 2 do
    118    local u = atoi[enc:sub(i, i)]
    119    local l = atoi[enc:sub(i + 1, i + 1)]
    120    if not u or not l then
    121      return nil, 'string must contain only hex characters'
    122    end
    123    str[(i + 1) / 2] = string.char(u * 16 + l)
    124  end
    125  return table.concat(str), nil
    126 end
    127 
    128 --- Sets the indent (i.e. the common leading whitespace) of non-empty lines in `text` to `size`
    129 --- spaces/tabs.
    130 ---
    131 --- Indent is calculated by number of consecutive indent chars.
    132 --- - The first indented, non-empty line decides the indent char (space/tab):
    133 ---   - `SPC SPC TAB …` = two-space indent.
    134 ---   - `TAB SPC …` = one-tab indent.
    135 --- - Set `opts.expandtab` to treat tabs as spaces.
    136 ---
    137 --- To "dedent" (remove the common indent), pass `size=0`:
    138 --- ```lua
    139 --- vim.print(vim.text.indent(0, ' a\n  b\n'))
    140 --- ```
    141 ---
    142 --- To adjust relative-to an existing indent, call indent() twice:
    143 --- ```lua
    144 --- local indented, old_indent = vim.text.indent(0, ' a\n b\n')
    145 --- indented = vim.text.indent(old_indent + 2, indented)
    146 --- vim.print(indented)
    147 --- ```
    148 ---
    149 --- To ignore the final, blank line when calculating the indent, use gsub() before calling indent():
    150 --- ```lua
    151 --- local text = '  a\n  b\n '
    152 --- vim.print(vim.text.indent(0, (text:gsub('\n[\t ]+\n?$', '\n'))))
    153 --- ```
    154 ---
    155 --- @param size integer Number of spaces.
    156 --- @param text string Text to indent.
    157 --- @param opts? { expandtab?: integer }
    158 --- @return string # Indented text.
    159 --- @return integer # Indent size _before_ modification.
    160 function M.indent(size, text, opts)
    161  vim.validate('size', size, 'number')
    162  vim.validate('text', text, 'string')
    163  vim.validate('opts', opts, 'table', true)
    164  -- TODO(justinmk): `opts.prefix`, `predicate` like python https://docs.python.org/3/library/textwrap.html
    165  opts = opts or {}
    166  local tabspaces = opts.expandtab and (' '):rep(opts.expandtab) or nil
    167 
    168  --- Minimum common indent shared by all lines.
    169  local old_indent --- @type integer?
    170  local prefix = tabspaces and ' ' or nil -- Indent char (space or tab).
    171  --- Check all non-empty lines, capturing leading whitespace (if any).
    172  --- @diagnostic disable-next-line: no-unknown
    173  for line_ws, extra in text:gmatch('([\t ]*)([^\n]+)') do
    174    line_ws = tabspaces and line_ws:gsub('[\t]', tabspaces) or line_ws
    175    -- XXX: blank line will miss the last whitespace char in `line_ws`, so we need to check `extra`.
    176    line_ws = line_ws .. (extra:match('^%s+$') or '')
    177    if 0 == #line_ws then
    178      -- Optimization: If any non-empty line has indent=0, there is no common indent.
    179      old_indent = 0
    180      break
    181    end
    182    prefix = prefix and prefix or line_ws:sub(1, 1)
    183    local _, end_ = line_ws:find('^[' .. prefix .. ']+')
    184    old_indent = math.min(old_indent or math.huge, end_ or 0) --[[@as integer?]]
    185  end
    186  -- Default to 0 if all lines are empty.
    187  old_indent = old_indent or 0
    188  prefix = prefix and prefix or ' '
    189 
    190  if old_indent == size then
    191    -- Optimization: if the indent is the same, return the text unchanged.
    192    return text, old_indent
    193  end
    194 
    195  local new_indent = prefix:rep(size)
    196 
    197  --- Replaces indentation of a line.
    198  --- @param line string
    199  local function replace_line(line)
    200    -- Match the existing indent exactly; avoid over-matching any following whitespace.
    201    local pat = prefix:rep(old_indent)
    202    -- Expand tabs before replacing indentation.
    203    line = not tabspaces and line
    204      or line:gsub('^[\t ]+', function(s)
    205        return s:gsub('\t', tabspaces)
    206      end)
    207    -- Text following the indent.
    208    local line_text = line:match('^' .. pat .. '(.*)') or line
    209    return new_indent .. line_text
    210  end
    211 
    212  return (text:gsub('[^\n]+', replace_line)), old_indent
    213 end
    214 
    215 return M