neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

commit 654303079baa9641a6fb50787e7f525ed64e6f40
parent f0294418d65d005d885eabf80eecc7255358b95e
Author: tao <2471314@gmail.com>
Date:   Mon, 17 Nov 2025 09:23:52 +0800

feat(lsp): skip invalid header lines #36402

Problem:
Some servers write log to stdout and there's no way to avoid it.
See https://github.com/neovim/neovim/pull/35743#pullrequestreview-3379705828

Solution:
We can extract `content-length` field byte by byte and skip invalid
lines via a simple state machine (name/colon/value/invalid), with minimal
performance impact.

I chose byte parsing here instead of pattern. Although it's a bit more complex,
it provides more stable performance and allows for more accurate error info when
needed.

Here is a bench result and script:

    parse header1 by pattern: 59.52377ms 45
    parse header1 by byte: 7.531128ms 45

    parse header2 by pattern: 26.06936ms 45
    parse header2 by byte: 5.235724ms 45

    parse header3 by pattern: 9.348495ms 45
    parse header3 by byte: 3.452389ms 45

    parse header4 by pattern: 9.73156ms 45
    parse header4 by byte: 3.638386ms 45

Script:

```lua
local strbuffer = require('string.buffer')

--- @param header string
local function get_content_length(header)
  for line in header:gmatch('(.-)\r?\n') do
    if line == '' then
      break
    end
    local key, value = line:match('^%s*(%S+)%s*:%s*(%d+)%s*$')
    if key and key:lower() == 'content-length' then
      return assert(tonumber(value))
    end
  end
  error('Content-Length not found in header: ' .. header)
end

--- @param header string
local function get_content_length_by_byte(header)
  local state = 'name'
  local i, len = 1, #header
  local j, name = 1, 'content-length'
  local buf = strbuffer.new()
  local digit = true
  while i <= len do
    local c = header:byte(i)
    if state == 'name' then
      if c >= 65 and c <= 90 then -- lower case
        c = c + 32
      end
      if (c == 32 or c == 9) and j == 1 then
        -- skip OWS for compatibility only
      elseif c == name:byte(j) then
        j = j + 1
      elseif c == 58 and j == 15 then
        state = 'colon'
      else
        state = 'invalid'
      end
    elseif state == 'colon' then
      if c ~= 32 and c ~= 9 then -- skip OWS normally
        state = 'value'
        i = i - 1
      end
    elseif state == 'value' then
      if c == 13 and header:byte(i + 1) == 10 then -- must end with \r\n
        local value = buf:get()
        return assert(digit and tonumber(value), 'value of Content-Length is not number: ' .. value)
      else
        buf:put(string.char(c))
      end
      if c < 48 and c ~= 32 and c ~= 9 or c > 57 then
        digit = false
      end
    elseif state == 'invalid' then
      if c == 10 then -- reset for next line
        state, j = 'name', 1
      end
    end
    i = i + 1
  end
  error('Content-Length not found in header: ' .. header)
end

--- @param fn fun(header: string): number
local function bench(label, header, fn, count)
  local start = vim.uv.hrtime()
  local value --- @type number
  for _ = 1, count do
    value = fn(header)
  end
  local elapsed = (vim.uv.hrtime() - start) / 1e6
  print(label .. ':', elapsed .. 'ms', value)
end

-- header starting with log lines
local header1 =
  'WARN: no common words file defined for Khmer - this language might not be correctly auto-detected\nWARN: no common words file defined for Japanese - this language might not be correctly auto-detected\nContent-Length: 45  \r\n\r\n'
-- header starting with content-type
local header2 = 'Content-Type: application/json-rpc; charset=utf-8\r\nContent-Length: 45  \r\n'
-- regular header
local header3 = '  Content-Length: 45\r\n'
-- regular header ending with content-type
local header4 = '  Content-Length: 45 \r\nContent-Type: application/json-rpc; charset=utf-8\r\n'

local count = 10000

collectgarbage('collect')
bench('parse header1 by pattern', header1, get_content_length, count)
collectgarbage('collect')
bench('parse header1 by byte', header1, get_content_length_by_byte, count)

collectgarbage('collect')
bench('parse header2 by pattern', header2, get_content_length, count)
collectgarbage('collect')
bench('parse header2 by byte', header2, get_content_length_by_byte, count)

collectgarbage('collect')
bench('parse header3 by pattern', header3, get_content_length, count)
collectgarbage('collect')
bench('parse header3 by byte', header3, get_content_length_by_byte, count)

collectgarbage('collect')
bench('parse header4 by pattern', header4, get_content_length, count)
collectgarbage('collect')
bench('parse header4 by byte', header4, get_content_length_by_byte, count)
```

Also, I removed an outdated test
https://github.com/neovim/neovim/blob/accd392f4d14a114e378f84dc15cb24bc34a370a/test/functional/plugin/lsp_spec.lua#L1950
and tweaked the boilerplate in two other tests for reusability while keeping the final assertions the same.
https://github.com/neovim/neovim/blob/accd392f4d14a114e378f84dc15cb24bc34a370a/test/functional/plugin/lsp_spec.lua#L5704
https://github.com/neovim/neovim/blob/accd392f4d14a114e378f84dc15cb24bc34a370a/test/functional/plugin/lsp_spec.lua#L5721
Diffstat:
Mruntime/lua/vim/_stringbuffer.lua | 3++-
Mruntime/lua/vim/lsp/rpc.lua | 59+++++++++++++++++++++++++++++++++++++++++++++++++----------
Mtest/functional/fixtures/fake-lsp-server.lua | 4----
Mtest/functional/plugin/lsp/testutil.lua | 42+++++++++++++++++++++++++++++-------------
Mtest/functional/plugin/lsp_spec.lua | 144++++++++++++++++++++++++++++++++++++++-----------------------------------------
5 files changed, 149 insertions(+), 103 deletions(-)

diff --git a/runtime/lua/vim/_stringbuffer.lua b/runtime/lua/vim/_stringbuffer.lua @@ -77,9 +77,10 @@ function StrBuffer:set(str) return self:reset():put(str) end ---- @param n integer +--- @param n? integer --- @return string function StrBuffer:get(n) + n = n or self.len local r = self:_peak(n) self:skip(n) return r diff --git a/runtime/lua/vim/lsp/rpc.lua b/runtime/lua/vim/lsp/rpc.lua @@ -1,6 +1,7 @@ local log = require('vim.lsp.log') local protocol = require('vim.lsp.protocol') local lsp_transport = require('vim.lsp._transport') +local strbuffer = require('vim._stringbuffer') local validate, schedule_wrap = vim.validate, vim.schedule_wrap --- Embeds the given string into a table and correctly computes `Content-Length`. @@ -16,19 +17,59 @@ local function format_message_with_content_length(message) }) end ---- Extract content-length from the header +--- Extract content-length from the header. +--- +--- The structure of header fields conforms to the [HTTP semantic](https://tools.ietf.org/html/rfc7230#section-3.2). +--- i.e., `header-field = field-name : OWS field-value OWS`, +--- OWS means optional whitespace (Space/Horizontal Tab). --- +--- we ignore lines ending with `\n` that don't contain `content-length`, since some servers +--- write log to stdout and there's no way to avoid it. +--- See https://github.com/neovim/neovim/pull/35743#pullrequestreview-3379705828 --- @param header string The header to parse --- @return integer local function get_content_length(header) - for line in header:gmatch('(.-)\r\n') do - if line == '' then - break - end - local key, value = line:match('^%s*(%S+)%s*:%s*(%d+)%s*$') - if key and key:lower() == 'content-length' then - return assert(tonumber(value)) + local state = 'name' + local i, len = 1, #header + local j, name = 1, 'content-length' + local buf = strbuffer.new() + local digit = true + while i <= len do + local c = header:byte(i) + if state == 'name' then + if c >= 65 and c <= 90 then -- lower case + c = c + 32 + end + if (c == 32 or c == 9) and j == 1 then -- luacheck: ignore 542 + -- skip OWS for compatibility only + elseif c == name:byte(j) then + j = j + 1 + elseif c == 58 and j == 15 then + state = 'colon' + else + state = 'invalid' + end + elseif state == 'colon' then + if c ~= 32 and c ~= 9 then -- skip OWS normally + state = 'value' + i = i - 1 + end + elseif state == 'value' then + if c == 13 and header:byte(i + 1) == 10 then -- must end with \r\n + local value = buf:get() + return assert(digit and tonumber(value), 'value of Content-Length is not number: ' .. value) + else + buf:put(string.char(c)) + end + if c < 48 and c ~= 32 and c ~= 9 or c > 57 then + digit = false + end + elseif state == 'invalid' then + if c == 10 then -- reset for next line + state, j = 'name', 1 + end end + i = i + 1 end error('Content-Length not found in header: ' .. header) end @@ -149,8 +190,6 @@ local default_dispatchers = { end, } -local strbuffer = require('vim._stringbuffer') - --- @async local function request_parser_loop() local buf = strbuffer.new() diff --git a/test/functional/fixtures/fake-lsp-server.lua b/test/functional/fixtures/fake-lsp-server.lua @@ -755,10 +755,6 @@ function tests.basic_check_buffer_open_and_change_incremental_editing() } end -function tests.invalid_header() - io.stdout:write('Content-length: \r\n') -end - function tests.decode_nil() skeleton { on_init = function(_) diff --git a/test/functional/plugin/lsp/testutil.lua b/test/functional/plugin/lsp/testutil.lua @@ -24,27 +24,43 @@ end M.create_tcp_echo_server = function() --- Create a TCP server that echos the first message it receives. --- @param host string - ---@return uv.uv_tcp_t - ---@return integer - ---@return fun():string|nil + --- @return integer function _G._create_tcp_server(host) local uv = vim.uv local server = assert(uv.new_tcp()) - local init = nil + local on_read = require('vim.lsp.rpc').create_read_loop( + function(body) + vim.rpcnotify(1, 'body', body) + end, + nil, + function(err, code) + vim.rpcnotify(1, 'error', err, code) + end + ) server:bind(host, 0) - server:listen(127, function(err) - assert(not err, err) + server:listen(127, function(e) + assert(not e, e) local socket = assert(uv.new_tcp()) server:accept(socket) - socket:read_start(require('vim.lsp.rpc').create_read_loop(function(body) - init = body + socket:read_start(function(err, chunk) + on_read(err, chunk) + socket:shutdown() socket:close() - end)) + server:shutdown() + server:close() + end) + end) + return server:getsockname().port + end + function _G._send_msg_to_server(msg) + local port = _G._create_tcp_server('127.0.0.1') + local client = assert(vim.uv.new_tcp()) + client:connect('127.0.0.1', port, function() + client:write(msg, function() + client:shutdown() + client:close() + end) end) - local port = server:getsockname().port - return server, port, function() - return init - end end end diff --git a/test/functional/plugin/lsp_spec.lua b/test/functional/plugin/lsp_spec.lua @@ -65,6 +65,17 @@ local function apply_text_edits(edits, encoding) end) end +--- @param notification_cb fun(method: 'body' | 'error', args: any) +local function verify_single_notification(notification_cb) + local called = false + n.run(nil, function(method, args) + notification_cb(method, args) + stop() + called = true + end, nil, 1000) + eq(true, called) +end + -- TODO(justinmk): hangs on Windows https://github.com/neovim/neovim/pull/11837 if skip(is_os('win')) then return @@ -1921,65 +1932,58 @@ describe('LSP', function() end) describe('parsing tests', function() - it('should handle invalid content-length correctly', function() - local expected_handlers = { - { NIL, {}, { method = 'shutdown', client_id = 1 } }, - { NIL, {}, { method = 'finish', client_id = 1 } }, - { NIL, {}, { method = 'start', client_id = 1 } }, - } - local client --- @type vim.lsp.Client - test_rpc_server { - test_name = 'invalid_header', - on_setup = function() end, - on_init = function(_client) - client = _client - client:stop(true) - end, - on_exit = function(code, signal) - eq(0, code, 'exit code') - eq(0, signal, 'exit signal') - end, - on_handler = function(err, result, ctx) - eq(table.remove(expected_handlers), { err, result, ctx }, 'expected handler') - end, - } + local body = '{"jsonrpc":"2.0","id": 1,"method":"demo"}' + + before_each(function() + exec_lua(create_tcp_echo_server) end) it('should catch error while parsing invalid header', function() - local header = 'Content-Length: \r\n' - local called = false + -- No whitespace is allowed between the header field-name and colon. + -- See https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.4 + local field = 'Content-Length : 10 \r\n' exec_lua(function() - local server = assert(vim.uv.new_tcp()) - server:bind('127.0.0.1', 0) - server:listen(1, function(e) - assert(not e, e) - local socket = assert(vim.uv.new_tcp()) - server:accept(socket) - socket:write(header .. '\r\n', function() - socket:shutdown() - server:close() - end) - end) - local client = assert(vim.uv.new_tcp()) - local on_read = require('vim.lsp.rpc').create_read_loop(function() end, function() - client:close() - end, function(err, code) - vim.rpcnotify(1, 'error', err, code) - end) - client:connect('127.0.0.1', server:getsockname().port, function() - client:read_start(on_read) - end) + _G._send_msg_to_server(field .. '\r\n') end) - n.run(nil, function(method, args) - local err, code = unpack(args) --- @type string, number + verify_single_notification(function(method, args) ---@param args [string, number] eq('error', method) - eq(1, code) - matches(vim.pesc('Content-Length not found in header: ' .. header) .. '$', err) - called = true - stop() - return NIL - end, nil, 1000) - eq(true, called) + eq(1, args[2]) + matches(vim.pesc('Content-Length not found in header: ' .. field) .. '$', args[1]) + end) + end) + + it('value of Content-Length shoud be number', function() + local value = '123 foo' + exec_lua(function() + _G._send_msg_to_server('Content-Length: ' .. value .. '\r\n\r\n') + end) + verify_single_notification(function(method, args) ---@param args [string, number] + eq('error', method) + eq(1, args[2]) + matches('value of Content%-Length is not number: ' .. value .. '$', args[1]) + end) + end) + + it('field name is case-insensitive', function() + exec_lua(function() + _G._send_msg_to_server('CONTENT-Length: ' .. #body .. ' \r\n\r\n' .. body) + end) + verify_single_notification(function(method, args) ---@param args [string] + eq('body', method) + eq(body, args[1]) + end) + end) + + it("ignore some lines ending with LF that don't contain content-length", function() + exec_lua(function() + _G._send_msg_to_server( + 'foo \n bar\nWARN: no common words.\nContent-Length: ' .. #body .. ' \r\n\r\n' .. body + ) + end) + verify_single_notification(function(method, args) ---@param args [string] + eq('body', method) + eq(body, args[1]) + end) end) it('should not trim vim.NIL from the end of a list', function() @@ -5681,37 +5685,27 @@ describe('LSP', function() describe('cmd', function() it('connects to lsp server via rpc.connect using ip address', function() exec_lua(create_tcp_echo_server) - local result = exec_lua(function() - local server, port, last_message = _G._create_tcp_server('127.0.0.1') + exec_lua(function() + local port = _G._create_tcp_server('127.0.0.1') vim.lsp.start({ name = 'dummy', cmd = vim.lsp.rpc.connect('127.0.0.1', port) }) - vim.wait(1000, function() - return last_message() ~= nil - end) - local init = last_message() - assert(init, 'server must receive `initialize` request') - server:close() - server:shutdown() - return vim.json.decode(init) end) - eq('initialize', result.method) + verify_single_notification(function(method, args) ---@param args [string] + eq('body', method) + eq('initialize', vim.json.decode(args[1]).method) + end) end) it('connects to lsp server via rpc.connect using hostname', function() skip(is_os('bsd'), 'issue with host resolution in ci') exec_lua(create_tcp_echo_server) - local result = exec_lua(function() - local server, port, last_message = _G._create_tcp_server('::1') + exec_lua(function() + local port = _G._create_tcp_server('::1') vim.lsp.start({ name = 'dummy', cmd = vim.lsp.rpc.connect('localhost', port) }) - vim.wait(1000, function() - return last_message() ~= nil - end) - local init = last_message() - assert(init, 'server must receive `initialize` request') - server:close() - server:shutdown() - return vim.json.decode(init) end) - eq('initialize', result.method) + verify_single_notification(function(method, args) ---@param args [string] + eq('body', method) + eq('initialize', vim.json.decode(args[1]).method) + end) end) it('can connect to lsp server via pipe or domain_socket', function()