neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

sync.lua (16340B)


      1 -- Notes on incremental sync:
      2 --  Per the protocol, the text range should be:
      3 --
      4 --  A position inside a document (see Position definition below) is expressed as
      5 --  a zero-based line and character offset. The offsets are based on a UTF-16
      6 --  string representation. So a string of the form a𐐀b the character offset
      7 --  of the character a is 0, the character offset of 𐐀 is 1 and the character
      8 --  offset of b is 3 since 𐐀 is represented using two code units in UTF-16.
      9 --
     10 --  To ensure that both client and server split the string into the same line
     11 --  representation the protocol specifies the following end-of-line sequences: ‘\n’, ‘\r\n’ and ‘\r’.
     12 --
     13 --  Positions are line end character agnostic. So you can not specify a position that
     14 --  denotes \r|\n or \n| where | represents the character offset. This means *no* defining
     15 --  a range than ends on the same line after a terminating character
     16 --
     17 -- Generic warnings about byte level changes in neovim. Many apparently "single"
     18 -- operations in on_lines callbacks are actually multiple operations.
     19 --
     20 --  Join operation (2 operations):
     21 --  * extends line 1 with the contents of line 2
     22 --  * deletes line 2
     23 --
     24 --  test 1    test 1 test 2    test 1 test 2
     25 --  test 2 -> test 2        -> test 3
     26 --  test 3    test 3
     27 --
     28 --  Deleting (and undoing) two middle lines (1 operation):
     29 --
     30 --  test 1    test 1
     31 --  test 2 -> test 4
     32 --  test 3
     33 --  test 4
     34 --
     35 --  Deleting partial lines (5 operations) deleting between asterisks below:
     36 --
     37 --  test *1   test *    test *     test *    test *4    test *4*
     38 --  test 2 -> test 2 -> test *4 -> *4     -> *4      ->
     39 --  test 3    test 3
     40 --  test *4   test 4
     41 
     42 local M = {}
     43 
     44 -- local string.byte, unclear if this is necessary for JIT compilation
     45 local str_byte = string.byte
     46 local min = math.min
     47 local str_utfindex = vim.str_utfindex
     48 local str_utf_start = vim.str_utf_start
     49 local str_utf_end = vim.str_utf_end
     50 
     51 -- Given a line, byte idx, alignment, and position_encoding convert to the aligned
     52 -- utf-8 index and either the utf-16, or utf-32 index.
     53 ---@param line string the line to index into
     54 ---@param byte integer the byte idx
     55 ---@param position_encoding 'utf-8'|'utf-16'|'utf-32'? (default: utf-8)
     56 ---@return integer byte_idx of first change position
     57 ---@return integer char_idx of first change position
     58 local function align_end_position(line, byte, position_encoding)
     59  position_encoding = position_encoding or 'utf-8'
     60  local char --- @type integer
     61  -- If on the first byte, or an empty string: the trivial case
     62  if byte == 1 or #line == 0 then
     63    char = byte
     64    -- Called in the case of extending an empty line "" -> "a"
     65  elseif byte == #line + 1 then
     66    char = str_utfindex(line, position_encoding) + 1
     67  else
     68    -- Modifying line, find the nearest utf codepoint
     69    local offset = str_utf_start(line, byte)
     70    -- If the byte does not fall on the start of the character, then
     71    -- align to the start of the next character.
     72    if offset < 0 then
     73      byte = byte + str_utf_end(line, byte) + 1
     74    end
     75    if byte <= #line then
     76      --- Convert to 0 based for input, and from 0 based for output
     77      char = str_utfindex(line, position_encoding, byte - 1) + 1
     78    else
     79      char = str_utfindex(line, position_encoding) + 1
     80    end
     81    -- Extending line, find the nearest utf codepoint for the last valid character
     82  end
     83  return byte, char
     84 end
     85 
     86 ---@class vim.lsp.sync.Range
     87 ---@field line_idx integer
     88 ---@field byte_idx integer
     89 ---@field char_idx integer
     90 
     91 --- Finds the first line, byte, and char index of the difference between the previous and current lines buffer normalized to the previous codepoint.
     92 ---@param prev_lines string[] list of lines from previous buffer
     93 ---@param curr_lines string[] list of lines from current buffer
     94 ---@param firstline integer firstline from on_lines, adjusted to 1-index
     95 ---@param lastline integer lastline from on_lines, adjusted to 1-index
     96 ---@param new_lastline integer new_lastline from on_lines, adjusted to 1-index
     97 ---@param position_encoding 'utf-8'|'utf-16'|'utf-32'? (default: utf-8)
     98 ---@return vim.lsp.sync.Range result table include line_idx, byte_idx, and char_idx of first change position
     99 local function compute_start_range(
    100  prev_lines,
    101  curr_lines,
    102  firstline,
    103  lastline,
    104  new_lastline,
    105  position_encoding
    106 )
    107  position_encoding = position_encoding or 'utf-8'
    108 
    109  local char_idx --- @type integer?
    110  local byte_idx --- @type integer?
    111  -- If firstline == lastline, no existing text is changed. All edit operations
    112  -- occur on a new line pointed to by lastline. This occurs during insertion of
    113  -- new lines(O), the new newline is inserted at the line indicated by
    114  -- new_lastline.
    115  if firstline == lastline then
    116    local line_idx --- @type integer
    117    local line = prev_lines[firstline - 1]
    118    if line then
    119      line_idx = firstline - 1
    120      byte_idx = #line + 1
    121      char_idx = str_utfindex(line, position_encoding) + 1
    122    else
    123      line_idx = firstline
    124      byte_idx = 1
    125      char_idx = 1
    126    end
    127    return { line_idx = line_idx, byte_idx = byte_idx, char_idx = char_idx }
    128  end
    129 
    130  -- If firstline == new_lastline, the first change occurred on a line that was deleted.
    131  -- In this case, the first byte change is also at the first byte of firstline
    132  if firstline == new_lastline then
    133    return { line_idx = firstline, byte_idx = 1, char_idx = 1 }
    134  end
    135 
    136  local prev_line = assert(prev_lines[firstline])
    137  local curr_line = assert(curr_lines[firstline])
    138 
    139  -- Iterate across previous and current line containing first change
    140  -- to find the first different byte.
    141  -- Note: *about -> a*about will register the second a as the first
    142  -- difference, regardless of edit since we do not receive the first
    143  -- column of the edit from on_lines.
    144  local start_byte_idx = 1
    145  for idx = 1, #prev_line + 1 do
    146    start_byte_idx = idx
    147    if str_byte(prev_line, idx) ~= str_byte(curr_line, idx) then
    148      break
    149    end
    150  end
    151 
    152  -- Convert byte to codepoint if applicable
    153  if start_byte_idx == 1 or (#prev_line == 0 and start_byte_idx == 1) then
    154    byte_idx = start_byte_idx
    155    char_idx = 1
    156  elseif start_byte_idx == #prev_line + 1 then
    157    byte_idx = start_byte_idx
    158    char_idx = str_utfindex(prev_line, position_encoding) + 1
    159  else
    160    byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
    161    --- Convert to 0 based for input, and from 0 based for output
    162    char_idx = str_utfindex(prev_line, position_encoding, byte_idx - 1) + 1
    163  end
    164 
    165  -- Return the start difference (shared for new and prev lines)
    166  return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx }
    167 end
    168 
    169 --- Finds the last line and byte index of the differences between prev and current buffer.
    170 --- Normalized to the next codepoint.
    171 --- prev_end_range is the text range sent to the server representing the changed region.
    172 --- curr_end_range is the text that should be collected and sent to the server.
    173 ---
    174 ---@param prev_lines string[] list of lines
    175 ---@param curr_lines string[] list of lines
    176 ---@param start_range vim.lsp.sync.Range
    177 ---@param firstline integer
    178 ---@param lastline integer
    179 ---@param new_lastline integer
    180 ---@param position_encoding 'utf-8'|'utf-16'|'utf-32'
    181 ---@return vim.lsp.sync.Range prev_end_range
    182 ---@return vim.lsp.sync.Range curr_end_range
    183 local function compute_end_range(
    184  prev_lines,
    185  curr_lines,
    186  start_range,
    187  firstline,
    188  lastline,
    189  new_lastline,
    190  position_encoding
    191 )
    192  -- A special case for the following `firstline == new_lastline` case where lines are deleted.
    193  -- Even if the buffer has become empty, nvim behaves as if it has an empty line with eol.
    194  if #curr_lines == 1 and curr_lines[1] == '' then
    195    local prev_line = assert(prev_lines[lastline - 1])
    196    return {
    197      line_idx = lastline - 1,
    198      byte_idx = #prev_line + 1,
    199      char_idx = str_utfindex(prev_line, position_encoding) + 1,
    200    }, { line_idx = 1, byte_idx = 1, char_idx = 1 }
    201  end
    202  -- If firstline == new_lastline, the first change occurred on a line that was deleted.
    203  -- In this case, the last_byte...
    204  if firstline == new_lastline then
    205    return { line_idx = (lastline - new_lastline + firstline), byte_idx = 1, char_idx = 1 }, {
    206      line_idx = firstline,
    207      byte_idx = 1,
    208      char_idx = 1,
    209    }
    210  end
    211  if firstline == lastline then
    212    return { line_idx = firstline, byte_idx = 1, char_idx = 1 }, {
    213      line_idx = new_lastline - lastline + firstline,
    214      byte_idx = 1,
    215      char_idx = 1,
    216    }
    217  end
    218  -- Compare on last line, at minimum will be the start range
    219  local start_line_idx = start_range.line_idx
    220 
    221  -- lastline and new_lastline were last lines that were *not* replaced, compare previous lines
    222  local prev_line_idx = lastline - 1
    223  local curr_line_idx = new_lastline - 1
    224 
    225  local prev_line = assert(prev_lines[lastline - 1])
    226  local curr_line = assert(curr_lines[new_lastline - 1])
    227 
    228  local prev_line_length = #prev_line
    229  local curr_line_length = #curr_line
    230 
    231  local byte_offset = 0
    232 
    233  -- Editing the same line
    234  -- If the byte offset is zero, that means there is a difference on the last byte (not newline)
    235  if prev_line_idx == curr_line_idx then
    236    local max_length --- @type integer
    237    if start_line_idx == prev_line_idx then
    238      -- Search until beginning of difference
    239      max_length = min(
    240        prev_line_length - start_range.byte_idx,
    241        curr_line_length - start_range.byte_idx
    242      ) + 1
    243    else
    244      max_length = min(prev_line_length, curr_line_length) + 1
    245    end
    246    for idx = 0, max_length do
    247      byte_offset = idx
    248      if
    249        str_byte(prev_line, prev_line_length - byte_offset)
    250        ~= str_byte(curr_line, curr_line_length - byte_offset)
    251      then
    252        break
    253      end
    254    end
    255  end
    256 
    257  -- Iterate from end to beginning of shortest line
    258  local prev_end_byte_idx = prev_line_length - byte_offset + 1
    259 
    260  -- Handle case where lines match
    261  if prev_end_byte_idx == 0 then
    262    prev_end_byte_idx = 1
    263  end
    264  local prev_byte_idx, prev_char_idx =
    265    align_end_position(prev_line, prev_end_byte_idx, position_encoding)
    266  local prev_end_range =
    267    { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx }
    268 
    269  local curr_end_range ---@type vim.lsp.sync.Range
    270  -- Deletion event, new_range cannot be before start
    271  if curr_line_idx < start_line_idx then
    272    curr_end_range = { line_idx = start_line_idx, byte_idx = 1, char_idx = 1 }
    273  else
    274    local curr_end_byte_idx = curr_line_length - byte_offset + 1
    275    -- Handle case where lines match
    276    if curr_end_byte_idx == 0 then
    277      curr_end_byte_idx = 1
    278    end
    279    local curr_byte_idx, curr_char_idx =
    280      align_end_position(curr_line, curr_end_byte_idx, position_encoding)
    281    curr_end_range =
    282      { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
    283  end
    284 
    285  return prev_end_range, curr_end_range
    286 end
    287 
    288 --- Get the text of the range defined by start and end line/column
    289 ---@param lines table list of lines
    290 ---@param start_range vim.lsp.sync.Range table returned by first_difference
    291 ---@param end_range vim.lsp.sync.Range new_end_range returned by last_difference
    292 ---@return string text extracted from defined region
    293 local function extract_text(lines, start_range, end_range, line_ending)
    294  if not lines[start_range.line_idx] then
    295    return ''
    296  end
    297  -- Trivial case: start and end range are the same line, directly grab changed text
    298  if start_range.line_idx == end_range.line_idx then
    299    -- string.sub is inclusive, end_range is not
    300    return string.sub(lines[start_range.line_idx], start_range.byte_idx, end_range.byte_idx - 1)
    301  else
    302    -- Handle deletion case
    303    -- Collect the changed portion of the first changed line
    304    local result = { string.sub(lines[start_range.line_idx], start_range.byte_idx) }
    305 
    306    -- Collect the full line for intermediate lines
    307    for idx = start_range.line_idx + 1, end_range.line_idx - 1 do
    308      table.insert(result, lines[idx])
    309    end
    310 
    311    if lines[end_range.line_idx] then
    312      -- Collect the changed portion of the last changed line.
    313      table.insert(result, string.sub(lines[end_range.line_idx], 1, end_range.byte_idx - 1))
    314    else
    315      table.insert(result, '')
    316    end
    317 
    318    -- Add line ending between all lines
    319    return table.concat(result, line_ending)
    320  end
    321 end
    322 
    323 -- rangelength depends on the position encoding
    324 -- bytes for utf-8 (clangd with extension)
    325 -- codepoints for utf-16
    326 -- codeunits for utf-32
    327 -- Line endings count here as 2 chars for \r\n (dos), 1 char for \n (unix), and 1 char for \r (mac)
    328 -- These correspond to Windows, Linux/macOS (OSX and newer), and macOS (version 9 and prior)
    329 ---@param lines string[]
    330 ---@param start_range vim.lsp.sync.Range
    331 ---@param end_range vim.lsp.sync.Range
    332 ---@param position_encoding 'utf-8'|'utf-16'|'utf-32'
    333 ---@param line_ending string
    334 ---@return integer
    335 local function compute_range_length(lines, start_range, end_range, position_encoding, line_ending)
    336  local line_ending_length = #line_ending
    337  -- Single line case
    338  if start_range.line_idx == end_range.line_idx then
    339    return end_range.char_idx - start_range.char_idx
    340  end
    341 
    342  local start_line = lines[start_range.line_idx]
    343  local range_length --- @type integer
    344  if start_line and #start_line > 0 then
    345    range_length = str_utfindex(start_line, position_encoding)
    346      - start_range.char_idx
    347      + 1
    348      + line_ending_length
    349  else
    350    -- Length of newline character
    351    range_length = line_ending_length
    352  end
    353 
    354  -- The first and last range of the line idx may be partial lines
    355  for idx = start_range.line_idx + 1, end_range.line_idx - 1 do
    356    -- Length full line plus newline character
    357    if #lines[idx] > 0 then
    358      range_length = range_length
    359        + str_utfindex(assert(lines[idx]), position_encoding)
    360        + #line_ending
    361    else
    362      range_length = range_length + line_ending_length
    363    end
    364  end
    365 
    366  local end_line = lines[end_range.line_idx]
    367  if end_line and #end_line > 0 then
    368    range_length = range_length + end_range.char_idx - 1
    369  end
    370 
    371  return range_length
    372 end
    373 
    374 --- Returns the range table for the difference between prev and curr lines
    375 ---@param prev_lines table list of lines
    376 ---@param curr_lines table list of lines
    377 ---@param firstline integer line to begin search for first difference
    378 ---@param lastline integer line to begin search in old_lines for last difference
    379 ---@param new_lastline integer line to begin search in new_lines for last difference
    380 ---@param position_encoding 'utf-8'|'utf-16'|'utf-32' encoding requested by language server
    381 ---@param line_ending string
    382 ---@return lsp.TextDocumentContentChangeEvent : see https://microsoft.github.io/language-server-protocol/specification/#textDocumentContentChangeEvent
    383 function M.compute_diff(
    384  prev_lines,
    385  curr_lines,
    386  firstline,
    387  lastline,
    388  new_lastline,
    389  position_encoding,
    390  line_ending
    391 )
    392  -- Find the start of changes between the previous and current buffer. Common between both.
    393  -- Sent to the server as the start of the changed range.
    394  -- Used to grab the changed text from the latest buffer.
    395  local start_range = compute_start_range(
    396    prev_lines,
    397    curr_lines,
    398    firstline + 1,
    399    lastline + 1,
    400    new_lastline + 1,
    401    position_encoding
    402  )
    403  -- Find the last position changed in the previous and current buffer.
    404  -- prev_end_range is sent to the server as as the end of the changed range.
    405  -- curr_end_range is used to grab the changed text from the latest buffer.
    406  local prev_end_range, curr_end_range = compute_end_range(
    407    prev_lines,
    408    curr_lines,
    409    start_range,
    410    firstline + 1,
    411    lastline + 1,
    412    new_lastline + 1,
    413    position_encoding
    414  )
    415 
    416  -- Grab the changed text of from start_range to curr_end_range in the current buffer.
    417  -- The text range is "" if entire range is deleted.
    418  local text = extract_text(curr_lines, start_range, curr_end_range, line_ending)
    419 
    420  -- Compute the range of the replaced text. Deprecated but still required for certain language servers
    421  local range_length =
    422    compute_range_length(prev_lines, start_range, prev_end_range, position_encoding, line_ending)
    423 
    424  -- convert to 0 based indexing
    425  local result = {
    426    range = {
    427      ['start'] = { line = start_range.line_idx - 1, character = start_range.char_idx - 1 },
    428      ['end'] = { line = prev_end_range.line_idx - 1, character = prev_end_range.char_idx - 1 },
    429    },
    430    text = text,
    431    rangeLength = range_length,
    432  }
    433 
    434  return result
    435 end
    436 
    437 return M