commit 654303079baa9641a6fb50787e7f525ed64e6f40
parent f0294418d65d005d885eabf80eecc7255358b95e
Author: tao <2471314@gmail.com>
Date: Mon, 17 Nov 2025 09:23:52 +0800
feat(lsp): skip invalid header lines #36402
Problem:
Some servers write log to stdout and there's no way to avoid it.
See https://github.com/neovim/neovim/pull/35743#pullrequestreview-3379705828
Solution:
We can extract `content-length` field byte by byte and skip invalid
lines via a simple state machine (name/colon/value/invalid), with minimal
performance impact.
I chose byte parsing here instead of pattern. Although it's a bit more complex,
it provides more stable performance and allows for more accurate error info when
needed.
Here is a bench result and script:
parse header1 by pattern: 59.52377ms 45
parse header1 by byte: 7.531128ms 45
parse header2 by pattern: 26.06936ms 45
parse header2 by byte: 5.235724ms 45
parse header3 by pattern: 9.348495ms 45
parse header3 by byte: 3.452389ms 45
parse header4 by pattern: 9.73156ms 45
parse header4 by byte: 3.638386ms 45
Script:
```lua
local strbuffer = require('string.buffer')
--- @param header string
local function get_content_length(header)
for line in header:gmatch('(.-)\r?\n') do
if line == '' then
break
end
local key, value = line:match('^%s*(%S+)%s*:%s*(%d+)%s*$')
if key and key:lower() == 'content-length' then
return assert(tonumber(value))
end
end
error('Content-Length not found in header: ' .. header)
end
--- @param header string
local function get_content_length_by_byte(header)
local state = 'name'
local i, len = 1, #header
local j, name = 1, 'content-length'
local buf = strbuffer.new()
local digit = true
while i <= len do
local c = header:byte(i)
if state == 'name' then
if c >= 65 and c <= 90 then -- lower case
c = c + 32
end
if (c == 32 or c == 9) and j == 1 then
-- skip OWS for compatibility only
elseif c == name:byte(j) then
j = j + 1
elseif c == 58 and j == 15 then
state = 'colon'
else
state = 'invalid'
end
elseif state == 'colon' then
if c ~= 32 and c ~= 9 then -- skip OWS normally
state = 'value'
i = i - 1
end
elseif state == 'value' then
if c == 13 and header:byte(i + 1) == 10 then -- must end with \r\n
local value = buf:get()
return assert(digit and tonumber(value), 'value of Content-Length is not number: ' .. value)
else
buf:put(string.char(c))
end
if c < 48 and c ~= 32 and c ~= 9 or c > 57 then
digit = false
end
elseif state == 'invalid' then
if c == 10 then -- reset for next line
state, j = 'name', 1
end
end
i = i + 1
end
error('Content-Length not found in header: ' .. header)
end
--- @param fn fun(header: string): number
local function bench(label, header, fn, count)
local start = vim.uv.hrtime()
local value --- @type number
for _ = 1, count do
value = fn(header)
end
local elapsed = (vim.uv.hrtime() - start) / 1e6
print(label .. ':', elapsed .. 'ms', value)
end
-- header starting with log lines
local header1 =
'WARN: no common words file defined for Khmer - this language might not be correctly auto-detected\nWARN: no common words file defined for Japanese - this language might not be correctly auto-detected\nContent-Length: 45 \r\n\r\n'
-- header starting with content-type
local header2 = 'Content-Type: application/json-rpc; charset=utf-8\r\nContent-Length: 45 \r\n'
-- regular header
local header3 = ' Content-Length: 45\r\n'
-- regular header ending with content-type
local header4 = ' Content-Length: 45 \r\nContent-Type: application/json-rpc; charset=utf-8\r\n'
local count = 10000
collectgarbage('collect')
bench('parse header1 by pattern', header1, get_content_length, count)
collectgarbage('collect')
bench('parse header1 by byte', header1, get_content_length_by_byte, count)
collectgarbage('collect')
bench('parse header2 by pattern', header2, get_content_length, count)
collectgarbage('collect')
bench('parse header2 by byte', header2, get_content_length_by_byte, count)
collectgarbage('collect')
bench('parse header3 by pattern', header3, get_content_length, count)
collectgarbage('collect')
bench('parse header3 by byte', header3, get_content_length_by_byte, count)
collectgarbage('collect')
bench('parse header4 by pattern', header4, get_content_length, count)
collectgarbage('collect')
bench('parse header4 by byte', header4, get_content_length_by_byte, count)
```
Also, I removed an outdated test
https://github.com/neovim/neovim/blob/accd392f4d14a114e378f84dc15cb24bc34a370a/test/functional/plugin/lsp_spec.lua#L1950
and tweaked the boilerplate in two other tests for reusability while keeping the final assertions the same.
https://github.com/neovim/neovim/blob/accd392f4d14a114e378f84dc15cb24bc34a370a/test/functional/plugin/lsp_spec.lua#L5704
https://github.com/neovim/neovim/blob/accd392f4d14a114e378f84dc15cb24bc34a370a/test/functional/plugin/lsp_spec.lua#L5721
Diffstat:
5 files changed, 149 insertions(+), 103 deletions(-)
diff --git a/runtime/lua/vim/_stringbuffer.lua b/runtime/lua/vim/_stringbuffer.lua
@@ -77,9 +77,10 @@ function StrBuffer:set(str)
return self:reset():put(str)
end
---- @param n integer
+--- @param n? integer
--- @return string
function StrBuffer:get(n)
+ n = n or self.len
local r = self:_peak(n)
self:skip(n)
return r
diff --git a/runtime/lua/vim/lsp/rpc.lua b/runtime/lua/vim/lsp/rpc.lua
@@ -1,6 +1,7 @@
local log = require('vim.lsp.log')
local protocol = require('vim.lsp.protocol')
local lsp_transport = require('vim.lsp._transport')
+local strbuffer = require('vim._stringbuffer')
local validate, schedule_wrap = vim.validate, vim.schedule_wrap
--- Embeds the given string into a table and correctly computes `Content-Length`.
@@ -16,19 +17,59 @@ local function format_message_with_content_length(message)
})
end
---- Extract content-length from the header
+--- Extract content-length from the header.
+---
+--- The structure of header fields conforms to the [HTTP semantic](https://tools.ietf.org/html/rfc7230#section-3.2).
+--- i.e., `header-field = field-name : OWS field-value OWS`,
+--- OWS means optional whitespace (Space/Horizontal Tab).
---
+--- we ignore lines ending with `\n` that don't contain `content-length`, since some servers
+--- write log to stdout and there's no way to avoid it.
+--- See https://github.com/neovim/neovim/pull/35743#pullrequestreview-3379705828
--- @param header string The header to parse
--- @return integer
local function get_content_length(header)
- for line in header:gmatch('(.-)\r\n') do
- if line == '' then
- break
- end
- local key, value = line:match('^%s*(%S+)%s*:%s*(%d+)%s*$')
- if key and key:lower() == 'content-length' then
- return assert(tonumber(value))
+ local state = 'name'
+ local i, len = 1, #header
+ local j, name = 1, 'content-length'
+ local buf = strbuffer.new()
+ local digit = true
+ while i <= len do
+ local c = header:byte(i)
+ if state == 'name' then
+ if c >= 65 and c <= 90 then -- lower case
+ c = c + 32
+ end
+ if (c == 32 or c == 9) and j == 1 then -- luacheck: ignore 542
+ -- skip OWS for compatibility only
+ elseif c == name:byte(j) then
+ j = j + 1
+ elseif c == 58 and j == 15 then
+ state = 'colon'
+ else
+ state = 'invalid'
+ end
+ elseif state == 'colon' then
+ if c ~= 32 and c ~= 9 then -- skip OWS normally
+ state = 'value'
+ i = i - 1
+ end
+ elseif state == 'value' then
+ if c == 13 and header:byte(i + 1) == 10 then -- must end with \r\n
+ local value = buf:get()
+ return assert(digit and tonumber(value), 'value of Content-Length is not number: ' .. value)
+ else
+ buf:put(string.char(c))
+ end
+ if c < 48 and c ~= 32 and c ~= 9 or c > 57 then
+ digit = false
+ end
+ elseif state == 'invalid' then
+ if c == 10 then -- reset for next line
+ state, j = 'name', 1
+ end
end
+ i = i + 1
end
error('Content-Length not found in header: ' .. header)
end
@@ -149,8 +190,6 @@ local default_dispatchers = {
end,
}
-local strbuffer = require('vim._stringbuffer')
-
--- @async
local function request_parser_loop()
local buf = strbuffer.new()
diff --git a/test/functional/fixtures/fake-lsp-server.lua b/test/functional/fixtures/fake-lsp-server.lua
@@ -755,10 +755,6 @@ function tests.basic_check_buffer_open_and_change_incremental_editing()
}
end
-function tests.invalid_header()
- io.stdout:write('Content-length: \r\n')
-end
-
function tests.decode_nil()
skeleton {
on_init = function(_)
diff --git a/test/functional/plugin/lsp/testutil.lua b/test/functional/plugin/lsp/testutil.lua
@@ -24,27 +24,43 @@ end
M.create_tcp_echo_server = function()
--- Create a TCP server that echos the first message it receives.
--- @param host string
- ---@return uv.uv_tcp_t
- ---@return integer
- ---@return fun():string|nil
+ --- @return integer
function _G._create_tcp_server(host)
local uv = vim.uv
local server = assert(uv.new_tcp())
- local init = nil
+ local on_read = require('vim.lsp.rpc').create_read_loop(
+ function(body)
+ vim.rpcnotify(1, 'body', body)
+ end,
+ nil,
+ function(err, code)
+ vim.rpcnotify(1, 'error', err, code)
+ end
+ )
server:bind(host, 0)
- server:listen(127, function(err)
- assert(not err, err)
+ server:listen(127, function(e)
+ assert(not e, e)
local socket = assert(uv.new_tcp())
server:accept(socket)
- socket:read_start(require('vim.lsp.rpc').create_read_loop(function(body)
- init = body
+ socket:read_start(function(err, chunk)
+ on_read(err, chunk)
+ socket:shutdown()
socket:close()
- end))
+ server:shutdown()
+ server:close()
+ end)
+ end)
+ return server:getsockname().port
+ end
+ function _G._send_msg_to_server(msg)
+ local port = _G._create_tcp_server('127.0.0.1')
+ local client = assert(vim.uv.new_tcp())
+ client:connect('127.0.0.1', port, function()
+ client:write(msg, function()
+ client:shutdown()
+ client:close()
+ end)
end)
- local port = server:getsockname().port
- return server, port, function()
- return init
- end
end
end
diff --git a/test/functional/plugin/lsp_spec.lua b/test/functional/plugin/lsp_spec.lua
@@ -65,6 +65,17 @@ local function apply_text_edits(edits, encoding)
end)
end
+--- @param notification_cb fun(method: 'body' | 'error', args: any)
+local function verify_single_notification(notification_cb)
+ local called = false
+ n.run(nil, function(method, args)
+ notification_cb(method, args)
+ stop()
+ called = true
+ end, nil, 1000)
+ eq(true, called)
+end
+
-- TODO(justinmk): hangs on Windows https://github.com/neovim/neovim/pull/11837
if skip(is_os('win')) then
return
@@ -1921,65 +1932,58 @@ describe('LSP', function()
end)
describe('parsing tests', function()
- it('should handle invalid content-length correctly', function()
- local expected_handlers = {
- { NIL, {}, { method = 'shutdown', client_id = 1 } },
- { NIL, {}, { method = 'finish', client_id = 1 } },
- { NIL, {}, { method = 'start', client_id = 1 } },
- }
- local client --- @type vim.lsp.Client
- test_rpc_server {
- test_name = 'invalid_header',
- on_setup = function() end,
- on_init = function(_client)
- client = _client
- client:stop(true)
- end,
- on_exit = function(code, signal)
- eq(0, code, 'exit code')
- eq(0, signal, 'exit signal')
- end,
- on_handler = function(err, result, ctx)
- eq(table.remove(expected_handlers), { err, result, ctx }, 'expected handler')
- end,
- }
+ local body = '{"jsonrpc":"2.0","id": 1,"method":"demo"}'
+
+ before_each(function()
+ exec_lua(create_tcp_echo_server)
end)
it('should catch error while parsing invalid header', function()
- local header = 'Content-Length: \r\n'
- local called = false
+ -- No whitespace is allowed between the header field-name and colon.
+ -- See https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.4
+ local field = 'Content-Length : 10 \r\n'
exec_lua(function()
- local server = assert(vim.uv.new_tcp())
- server:bind('127.0.0.1', 0)
- server:listen(1, function(e)
- assert(not e, e)
- local socket = assert(vim.uv.new_tcp())
- server:accept(socket)
- socket:write(header .. '\r\n', function()
- socket:shutdown()
- server:close()
- end)
- end)
- local client = assert(vim.uv.new_tcp())
- local on_read = require('vim.lsp.rpc').create_read_loop(function() end, function()
- client:close()
- end, function(err, code)
- vim.rpcnotify(1, 'error', err, code)
- end)
- client:connect('127.0.0.1', server:getsockname().port, function()
- client:read_start(on_read)
- end)
+ _G._send_msg_to_server(field .. '\r\n')
end)
- n.run(nil, function(method, args)
- local err, code = unpack(args) --- @type string, number
+ verify_single_notification(function(method, args) ---@param args [string, number]
eq('error', method)
- eq(1, code)
- matches(vim.pesc('Content-Length not found in header: ' .. header) .. '$', err)
- called = true
- stop()
- return NIL
- end, nil, 1000)
- eq(true, called)
+ eq(1, args[2])
+ matches(vim.pesc('Content-Length not found in header: ' .. field) .. '$', args[1])
+ end)
+ end)
+
+ it('value of Content-Length shoud be number', function()
+ local value = '123 foo'
+ exec_lua(function()
+ _G._send_msg_to_server('Content-Length: ' .. value .. '\r\n\r\n')
+ end)
+ verify_single_notification(function(method, args) ---@param args [string, number]
+ eq('error', method)
+ eq(1, args[2])
+ matches('value of Content%-Length is not number: ' .. value .. '$', args[1])
+ end)
+ end)
+
+ it('field name is case-insensitive', function()
+ exec_lua(function()
+ _G._send_msg_to_server('CONTENT-Length: ' .. #body .. ' \r\n\r\n' .. body)
+ end)
+ verify_single_notification(function(method, args) ---@param args [string]
+ eq('body', method)
+ eq(body, args[1])
+ end)
+ end)
+
+ it("ignore some lines ending with LF that don't contain content-length", function()
+ exec_lua(function()
+ _G._send_msg_to_server(
+ 'foo \n bar\nWARN: no common words.\nContent-Length: ' .. #body .. ' \r\n\r\n' .. body
+ )
+ end)
+ verify_single_notification(function(method, args) ---@param args [string]
+ eq('body', method)
+ eq(body, args[1])
+ end)
end)
it('should not trim vim.NIL from the end of a list', function()
@@ -5681,37 +5685,27 @@ describe('LSP', function()
describe('cmd', function()
it('connects to lsp server via rpc.connect using ip address', function()
exec_lua(create_tcp_echo_server)
- local result = exec_lua(function()
- local server, port, last_message = _G._create_tcp_server('127.0.0.1')
+ exec_lua(function()
+ local port = _G._create_tcp_server('127.0.0.1')
vim.lsp.start({ name = 'dummy', cmd = vim.lsp.rpc.connect('127.0.0.1', port) })
- vim.wait(1000, function()
- return last_message() ~= nil
- end)
- local init = last_message()
- assert(init, 'server must receive `initialize` request')
- server:close()
- server:shutdown()
- return vim.json.decode(init)
end)
- eq('initialize', result.method)
+ verify_single_notification(function(method, args) ---@param args [string]
+ eq('body', method)
+ eq('initialize', vim.json.decode(args[1]).method)
+ end)
end)
it('connects to lsp server via rpc.connect using hostname', function()
skip(is_os('bsd'), 'issue with host resolution in ci')
exec_lua(create_tcp_echo_server)
- local result = exec_lua(function()
- local server, port, last_message = _G._create_tcp_server('::1')
+ exec_lua(function()
+ local port = _G._create_tcp_server('::1')
vim.lsp.start({ name = 'dummy', cmd = vim.lsp.rpc.connect('localhost', port) })
- vim.wait(1000, function()
- return last_message() ~= nil
- end)
- local init = last_message()
- assert(init, 'server must receive `initialize` request')
- server:close()
- server:shutdown()
- return vim.json.decode(init)
end)
- eq('initialize', result.method)
+ verify_single_notification(function(method, args) ---@param args [string]
+ eq('body', method)
+ eq('initialize', vim.json.decode(args[1]).method)
+ end)
end)
it('can connect to lsp server via pipe or domain_socket', function()