sync.lua (16340B)
1 -- Notes on incremental sync: 2 -- Per the protocol, the text range should be: 3 -- 4 -- A position inside a document (see Position definition below) is expressed as 5 -- a zero-based line and character offset. The offsets are based on a UTF-16 6 -- string representation. So a string of the form a𐐀b the character offset 7 -- of the character a is 0, the character offset of 𐐀 is 1 and the character 8 -- offset of b is 3 since 𐐀 is represented using two code units in UTF-16. 9 -- 10 -- To ensure that both client and server split the string into the same line 11 -- representation the protocol specifies the following end-of-line sequences: ‘\n’, ‘\r\n’ and ‘\r’. 12 -- 13 -- Positions are line end character agnostic. So you can not specify a position that 14 -- denotes \r|\n or \n| where | represents the character offset. This means *no* defining 15 -- a range than ends on the same line after a terminating character 16 -- 17 -- Generic warnings about byte level changes in neovim. Many apparently "single" 18 -- operations in on_lines callbacks are actually multiple operations. 19 -- 20 -- Join operation (2 operations): 21 -- * extends line 1 with the contents of line 2 22 -- * deletes line 2 23 -- 24 -- test 1 test 1 test 2 test 1 test 2 25 -- test 2 -> test 2 -> test 3 26 -- test 3 test 3 27 -- 28 -- Deleting (and undoing) two middle lines (1 operation): 29 -- 30 -- test 1 test 1 31 -- test 2 -> test 4 32 -- test 3 33 -- test 4 34 -- 35 -- Deleting partial lines (5 operations) deleting between asterisks below: 36 -- 37 -- test *1 test * test * test * test *4 test *4* 38 -- test 2 -> test 2 -> test *4 -> *4 -> *4 -> 39 -- test 3 test 3 40 -- test *4 test 4 41 42 local M = {} 43 44 -- local string.byte, unclear if this is necessary for JIT compilation 45 local str_byte = string.byte 46 local min = math.min 47 local str_utfindex = vim.str_utfindex 48 local str_utf_start = vim.str_utf_start 49 local str_utf_end = vim.str_utf_end 50 51 -- Given a line, byte idx, alignment, and position_encoding convert to the aligned 52 -- utf-8 index and either the utf-16, or utf-32 index. 53 ---@param line string the line to index into 54 ---@param byte integer the byte idx 55 ---@param position_encoding 'utf-8'|'utf-16'|'utf-32'? (default: utf-8) 56 ---@return integer byte_idx of first change position 57 ---@return integer char_idx of first change position 58 local function align_end_position(line, byte, position_encoding) 59 position_encoding = position_encoding or 'utf-8' 60 local char --- @type integer 61 -- If on the first byte, or an empty string: the trivial case 62 if byte == 1 or #line == 0 then 63 char = byte 64 -- Called in the case of extending an empty line "" -> "a" 65 elseif byte == #line + 1 then 66 char = str_utfindex(line, position_encoding) + 1 67 else 68 -- Modifying line, find the nearest utf codepoint 69 local offset = str_utf_start(line, byte) 70 -- If the byte does not fall on the start of the character, then 71 -- align to the start of the next character. 72 if offset < 0 then 73 byte = byte + str_utf_end(line, byte) + 1 74 end 75 if byte <= #line then 76 --- Convert to 0 based for input, and from 0 based for output 77 char = str_utfindex(line, position_encoding, byte - 1) + 1 78 else 79 char = str_utfindex(line, position_encoding) + 1 80 end 81 -- Extending line, find the nearest utf codepoint for the last valid character 82 end 83 return byte, char 84 end 85 86 ---@class vim.lsp.sync.Range 87 ---@field line_idx integer 88 ---@field byte_idx integer 89 ---@field char_idx integer 90 91 --- Finds the first line, byte, and char index of the difference between the previous and current lines buffer normalized to the previous codepoint. 92 ---@param prev_lines string[] list of lines from previous buffer 93 ---@param curr_lines string[] list of lines from current buffer 94 ---@param firstline integer firstline from on_lines, adjusted to 1-index 95 ---@param lastline integer lastline from on_lines, adjusted to 1-index 96 ---@param new_lastline integer new_lastline from on_lines, adjusted to 1-index 97 ---@param position_encoding 'utf-8'|'utf-16'|'utf-32'? (default: utf-8) 98 ---@return vim.lsp.sync.Range result table include line_idx, byte_idx, and char_idx of first change position 99 local function compute_start_range( 100 prev_lines, 101 curr_lines, 102 firstline, 103 lastline, 104 new_lastline, 105 position_encoding 106 ) 107 position_encoding = position_encoding or 'utf-8' 108 109 local char_idx --- @type integer? 110 local byte_idx --- @type integer? 111 -- If firstline == lastline, no existing text is changed. All edit operations 112 -- occur on a new line pointed to by lastline. This occurs during insertion of 113 -- new lines(O), the new newline is inserted at the line indicated by 114 -- new_lastline. 115 if firstline == lastline then 116 local line_idx --- @type integer 117 local line = prev_lines[firstline - 1] 118 if line then 119 line_idx = firstline - 1 120 byte_idx = #line + 1 121 char_idx = str_utfindex(line, position_encoding) + 1 122 else 123 line_idx = firstline 124 byte_idx = 1 125 char_idx = 1 126 end 127 return { line_idx = line_idx, byte_idx = byte_idx, char_idx = char_idx } 128 end 129 130 -- If firstline == new_lastline, the first change occurred on a line that was deleted. 131 -- In this case, the first byte change is also at the first byte of firstline 132 if firstline == new_lastline then 133 return { line_idx = firstline, byte_idx = 1, char_idx = 1 } 134 end 135 136 local prev_line = assert(prev_lines[firstline]) 137 local curr_line = assert(curr_lines[firstline]) 138 139 -- Iterate across previous and current line containing first change 140 -- to find the first different byte. 141 -- Note: *about -> a*about will register the second a as the first 142 -- difference, regardless of edit since we do not receive the first 143 -- column of the edit from on_lines. 144 local start_byte_idx = 1 145 for idx = 1, #prev_line + 1 do 146 start_byte_idx = idx 147 if str_byte(prev_line, idx) ~= str_byte(curr_line, idx) then 148 break 149 end 150 end 151 152 -- Convert byte to codepoint if applicable 153 if start_byte_idx == 1 or (#prev_line == 0 and start_byte_idx == 1) then 154 byte_idx = start_byte_idx 155 char_idx = 1 156 elseif start_byte_idx == #prev_line + 1 then 157 byte_idx = start_byte_idx 158 char_idx = str_utfindex(prev_line, position_encoding) + 1 159 else 160 byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx) 161 --- Convert to 0 based for input, and from 0 based for output 162 char_idx = str_utfindex(prev_line, position_encoding, byte_idx - 1) + 1 163 end 164 165 -- Return the start difference (shared for new and prev lines) 166 return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx } 167 end 168 169 --- Finds the last line and byte index of the differences between prev and current buffer. 170 --- Normalized to the next codepoint. 171 --- prev_end_range is the text range sent to the server representing the changed region. 172 --- curr_end_range is the text that should be collected and sent to the server. 173 --- 174 ---@param prev_lines string[] list of lines 175 ---@param curr_lines string[] list of lines 176 ---@param start_range vim.lsp.sync.Range 177 ---@param firstline integer 178 ---@param lastline integer 179 ---@param new_lastline integer 180 ---@param position_encoding 'utf-8'|'utf-16'|'utf-32' 181 ---@return vim.lsp.sync.Range prev_end_range 182 ---@return vim.lsp.sync.Range curr_end_range 183 local function compute_end_range( 184 prev_lines, 185 curr_lines, 186 start_range, 187 firstline, 188 lastline, 189 new_lastline, 190 position_encoding 191 ) 192 -- A special case for the following `firstline == new_lastline` case where lines are deleted. 193 -- Even if the buffer has become empty, nvim behaves as if it has an empty line with eol. 194 if #curr_lines == 1 and curr_lines[1] == '' then 195 local prev_line = assert(prev_lines[lastline - 1]) 196 return { 197 line_idx = lastline - 1, 198 byte_idx = #prev_line + 1, 199 char_idx = str_utfindex(prev_line, position_encoding) + 1, 200 }, { line_idx = 1, byte_idx = 1, char_idx = 1 } 201 end 202 -- If firstline == new_lastline, the first change occurred on a line that was deleted. 203 -- In this case, the last_byte... 204 if firstline == new_lastline then 205 return { line_idx = (lastline - new_lastline + firstline), byte_idx = 1, char_idx = 1 }, { 206 line_idx = firstline, 207 byte_idx = 1, 208 char_idx = 1, 209 } 210 end 211 if firstline == lastline then 212 return { line_idx = firstline, byte_idx = 1, char_idx = 1 }, { 213 line_idx = new_lastline - lastline + firstline, 214 byte_idx = 1, 215 char_idx = 1, 216 } 217 end 218 -- Compare on last line, at minimum will be the start range 219 local start_line_idx = start_range.line_idx 220 221 -- lastline and new_lastline were last lines that were *not* replaced, compare previous lines 222 local prev_line_idx = lastline - 1 223 local curr_line_idx = new_lastline - 1 224 225 local prev_line = assert(prev_lines[lastline - 1]) 226 local curr_line = assert(curr_lines[new_lastline - 1]) 227 228 local prev_line_length = #prev_line 229 local curr_line_length = #curr_line 230 231 local byte_offset = 0 232 233 -- Editing the same line 234 -- If the byte offset is zero, that means there is a difference on the last byte (not newline) 235 if prev_line_idx == curr_line_idx then 236 local max_length --- @type integer 237 if start_line_idx == prev_line_idx then 238 -- Search until beginning of difference 239 max_length = min( 240 prev_line_length - start_range.byte_idx, 241 curr_line_length - start_range.byte_idx 242 ) + 1 243 else 244 max_length = min(prev_line_length, curr_line_length) + 1 245 end 246 for idx = 0, max_length do 247 byte_offset = idx 248 if 249 str_byte(prev_line, prev_line_length - byte_offset) 250 ~= str_byte(curr_line, curr_line_length - byte_offset) 251 then 252 break 253 end 254 end 255 end 256 257 -- Iterate from end to beginning of shortest line 258 local prev_end_byte_idx = prev_line_length - byte_offset + 1 259 260 -- Handle case where lines match 261 if prev_end_byte_idx == 0 then 262 prev_end_byte_idx = 1 263 end 264 local prev_byte_idx, prev_char_idx = 265 align_end_position(prev_line, prev_end_byte_idx, position_encoding) 266 local prev_end_range = 267 { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx } 268 269 local curr_end_range ---@type vim.lsp.sync.Range 270 -- Deletion event, new_range cannot be before start 271 if curr_line_idx < start_line_idx then 272 curr_end_range = { line_idx = start_line_idx, byte_idx = 1, char_idx = 1 } 273 else 274 local curr_end_byte_idx = curr_line_length - byte_offset + 1 275 -- Handle case where lines match 276 if curr_end_byte_idx == 0 then 277 curr_end_byte_idx = 1 278 end 279 local curr_byte_idx, curr_char_idx = 280 align_end_position(curr_line, curr_end_byte_idx, position_encoding) 281 curr_end_range = 282 { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx } 283 end 284 285 return prev_end_range, curr_end_range 286 end 287 288 --- Get the text of the range defined by start and end line/column 289 ---@param lines table list of lines 290 ---@param start_range vim.lsp.sync.Range table returned by first_difference 291 ---@param end_range vim.lsp.sync.Range new_end_range returned by last_difference 292 ---@return string text extracted from defined region 293 local function extract_text(lines, start_range, end_range, line_ending) 294 if not lines[start_range.line_idx] then 295 return '' 296 end 297 -- Trivial case: start and end range are the same line, directly grab changed text 298 if start_range.line_idx == end_range.line_idx then 299 -- string.sub is inclusive, end_range is not 300 return string.sub(lines[start_range.line_idx], start_range.byte_idx, end_range.byte_idx - 1) 301 else 302 -- Handle deletion case 303 -- Collect the changed portion of the first changed line 304 local result = { string.sub(lines[start_range.line_idx], start_range.byte_idx) } 305 306 -- Collect the full line for intermediate lines 307 for idx = start_range.line_idx + 1, end_range.line_idx - 1 do 308 table.insert(result, lines[idx]) 309 end 310 311 if lines[end_range.line_idx] then 312 -- Collect the changed portion of the last changed line. 313 table.insert(result, string.sub(lines[end_range.line_idx], 1, end_range.byte_idx - 1)) 314 else 315 table.insert(result, '') 316 end 317 318 -- Add line ending between all lines 319 return table.concat(result, line_ending) 320 end 321 end 322 323 -- rangelength depends on the position encoding 324 -- bytes for utf-8 (clangd with extension) 325 -- codepoints for utf-16 326 -- codeunits for utf-32 327 -- Line endings count here as 2 chars for \r\n (dos), 1 char for \n (unix), and 1 char for \r (mac) 328 -- These correspond to Windows, Linux/macOS (OSX and newer), and macOS (version 9 and prior) 329 ---@param lines string[] 330 ---@param start_range vim.lsp.sync.Range 331 ---@param end_range vim.lsp.sync.Range 332 ---@param position_encoding 'utf-8'|'utf-16'|'utf-32' 333 ---@param line_ending string 334 ---@return integer 335 local function compute_range_length(lines, start_range, end_range, position_encoding, line_ending) 336 local line_ending_length = #line_ending 337 -- Single line case 338 if start_range.line_idx == end_range.line_idx then 339 return end_range.char_idx - start_range.char_idx 340 end 341 342 local start_line = lines[start_range.line_idx] 343 local range_length --- @type integer 344 if start_line and #start_line > 0 then 345 range_length = str_utfindex(start_line, position_encoding) 346 - start_range.char_idx 347 + 1 348 + line_ending_length 349 else 350 -- Length of newline character 351 range_length = line_ending_length 352 end 353 354 -- The first and last range of the line idx may be partial lines 355 for idx = start_range.line_idx + 1, end_range.line_idx - 1 do 356 -- Length full line plus newline character 357 if #lines[idx] > 0 then 358 range_length = range_length 359 + str_utfindex(assert(lines[idx]), position_encoding) 360 + #line_ending 361 else 362 range_length = range_length + line_ending_length 363 end 364 end 365 366 local end_line = lines[end_range.line_idx] 367 if end_line and #end_line > 0 then 368 range_length = range_length + end_range.char_idx - 1 369 end 370 371 return range_length 372 end 373 374 --- Returns the range table for the difference between prev and curr lines 375 ---@param prev_lines table list of lines 376 ---@param curr_lines table list of lines 377 ---@param firstline integer line to begin search for first difference 378 ---@param lastline integer line to begin search in old_lines for last difference 379 ---@param new_lastline integer line to begin search in new_lines for last difference 380 ---@param position_encoding 'utf-8'|'utf-16'|'utf-32' encoding requested by language server 381 ---@param line_ending string 382 ---@return lsp.TextDocumentContentChangeEvent : see https://microsoft.github.io/language-server-protocol/specification/#textDocumentContentChangeEvent 383 function M.compute_diff( 384 prev_lines, 385 curr_lines, 386 firstline, 387 lastline, 388 new_lastline, 389 position_encoding, 390 line_ending 391 ) 392 -- Find the start of changes between the previous and current buffer. Common between both. 393 -- Sent to the server as the start of the changed range. 394 -- Used to grab the changed text from the latest buffer. 395 local start_range = compute_start_range( 396 prev_lines, 397 curr_lines, 398 firstline + 1, 399 lastline + 1, 400 new_lastline + 1, 401 position_encoding 402 ) 403 -- Find the last position changed in the previous and current buffer. 404 -- prev_end_range is sent to the server as as the end of the changed range. 405 -- curr_end_range is used to grab the changed text from the latest buffer. 406 local prev_end_range, curr_end_range = compute_end_range( 407 prev_lines, 408 curr_lines, 409 start_range, 410 firstline + 1, 411 lastline + 1, 412 new_lastline + 1, 413 position_encoding 414 ) 415 416 -- Grab the changed text of from start_range to curr_end_range in the current buffer. 417 -- The text range is "" if entire range is deleted. 418 local text = extract_text(curr_lines, start_range, curr_end_range, line_ending) 419 420 -- Compute the range of the replaced text. Deprecated but still required for certain language servers 421 local range_length = 422 compute_range_length(prev_lines, start_range, prev_end_range, position_encoding, line_ending) 423 424 -- convert to 0 based indexing 425 local result = { 426 range = { 427 ['start'] = { line = start_range.line_idx - 1, character = start_range.char_idx - 1 }, 428 ['end'] = { line = prev_end_range.line_idx - 1, character = prev_end_range.char_idx - 1 }, 429 }, 430 text = text, 431 rangeLength = range_length, 432 } 433 434 return result 435 end 436 437 return M