neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

commit a6252c6683cccdd8ed56fa4ad70df9ea606e1498
parent 16c8a908ef675feeca459ba42e4ed04e181ac89f
Author: Yochem van Rosmalen <git@yochem.nl>
Date:   Tue, 10 Feb 2026 13:43:17 +0100

refactor(help): move escaping logic to Lua #37757

Problem:
Escaping logic for {subject} in ex cmd `:help {subject}` is done in a
messy 200+ lines C function which is hard to maintain and improve.

Solution:
Rewrite in Lua. Use `string.gsub()` instead of looping over characters
to improve clarity and add many more tests to be able to confidently
improve current code later on.
Diffstat:
Aruntime/lua/vim/_core/help.lua | 131+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/nvim/help.c | 232++++++-------------------------------------------------------------------------
Mtest/functional/core/main_spec.lua | 1+
Mtest/functional/ex_cmds/help_spec.lua | 108+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 255 insertions(+), 217 deletions(-)

diff --git a/runtime/lua/vim/_core/help.lua b/runtime/lua/vim/_core/help.lua @@ -0,0 +1,131 @@ +local M = {} + +local tag_exceptions = { + -- Interpret asterisk (star, '*') literal but name it 'star' + ['*'] = 'star', + ['g*'] = 'gstar', + ['[*'] = '[star', + [']*'] = ']star', + [':*'] = ':star', + ['/*'] = '/star', + ['/\\*'] = '/\\\\star', + ['\\\\star'] = '/\\\\star', + ['"*'] = 'quotestar', + ['**'] = 'starstar', + ['cpo-*'] = 'cpo-star', + + -- Literal question mark '?' + ['?'] = '?', + ['??'] = '??', + [':?'] = ':?', + ['?<CR>'] = '?<CR>', + ['g?'] = 'g?', + ['g?g?'] = 'g?g?', + ['g??'] = 'g??', + ['-?'] = '-?', + ['q?'] = 'q?', + ['v_g?'] = 'v_g?', + ['/\\?'] = '/\\\\?', + + -- Backslash-escaping hell + ['/\\%(\\)'] = '/\\\\%(\\\\)', + ['/\\z(\\)'] = '/\\\\z(\\\\)', + ['\\='] = '\\\\=', + ['\\%$'] = '/\\\\%\\$', + + -- Some expressions are literal but without the 'expr-' prefix. Note: not all 'expr-' subjects! + ['expr-!=?'] = '!=?', + ['expr-!~?'] = '!\\~?', + ['expr-<=?'] = '<=?', + ['expr-<?'] = '<?', + ['expr-==?'] = '==?', + ['expr-=~?'] = '=~?', + ['expr->=?'] = '>=?', + ['expr->?'] = '>?', + ['expr-is?'] = 'is?', + ['expr-isnot?'] = 'isnot?', +} + +---Transform a help tag query into a search pattern for find_tags(). +--- +---This function converts user input from `:help {subject}` into a regex pattern that balances +---literal matching with wildcard support. Vim help tags can contain characters that have special +---meaning in regex (like *, ?, |), but we also want to support wildcard searches. +--- +---Examples: +--- '*' --> 'star' (literal match for the * command help tag) +--- 'buffer*' --> 'buffer.*' (wildcard: find all buffer-related tags) +--- 'CTRL-W' --> stays as 'CTRL-W' (already in tag format) +--- '^A' --> 'CTRL-A' (caret notation converted to tag format) +--- +---@param word string The help subject as entered by the user +---@return string pattern The escaped regex pattern to search for in tag files +function M.escape_subject(word) + local replacement = tag_exceptions[word] + if replacement then + return replacement + end + + -- Add prefix '/\\' to patterns starting with a backslash + -- Examples: \S, \%^, \%(, \zs, \z1, \@<, \@=, \@<=, \_$, \_^ + if word:match([[^\.$]]) or word:match('^\\[%%_z@]') then + word = [[/\]] .. word + word = word:gsub('[$.~]', [[\%0]]) + word = word:gsub('|', 'bar') + else + -- Fix for bracket expressions and curly braces: + -- '\' --> '\\' (needs to come first) + -- '[' --> '\[' (escape the opening bracket) + -- ':[' --> ':\[' (escape the opening bracket) + -- '\{' --> '\\{' (for '\{' pattern matching) + -- '(' --> '' (parentheses around option tags should be ignored) + word = word:gsub([[\+]], [[\\]]) + word = word:gsub([[^%[]], [[\[]]) + word = word:gsub([[^:%[]], [[:\[]]) + word = word:gsub([[^\{]], [[\\{]]) + word = word:gsub([[^%(']], [[']]) + + word = word:gsub('|', 'bar') + word = word:gsub([["]], 'quote') + word = word:gsub('[$.~]', [[\%0]]) + word = word:gsub('%*', '.*') + word = word:gsub('?', '.') + + -- Handle control characters. + -- First convert raw control chars to the caret notation + -- E.g. 0x01 --> '^A' etc. + ---@type string + word = word:gsub('([\1-\31])', function(ctrl_char) + -- '^\' needs an extra backslash + local repr = string.char(ctrl_char:byte() + 64):gsub([[\]], [[\\]]) + return '^' .. repr + end) + + -- Change caret notation to 'CTRL-', except '^_' + -- E.g. 'i^G^J' --> 'iCTRL-GCTRL-J' + word = word:gsub('%^([^_])', 'CTRL-%1') + -- Add underscores around 'CTRL-X' characters + -- E.g. 'iCTRL-GCTRL-J' --> 'i_CTRL-G_CTRL-J' + -- Only exception: 'CTRL-{character}' + word = word:gsub('([^_])CTRL%-', '%1_CTRL-') + word = word:gsub('(CTRL%-[^{])([^_\\])', '%1_%2') + + -- Skip function arguments + -- E.g. 'abs({expr})' --> 'abs' + -- E.g. 'abs([arg])' --> 'abs' + word = word:gsub('%({.*', '') + word = word:gsub('%(%[.*', '') + + -- Skip punctuation after second apostrophe/curly brace + -- E.g. ''option',' --> ''option'' + -- E.g. '{address},' --> '{address}' + -- E.g. '`command`,' --> 'command' (backticks are removed too, but '``' stays '``') + word = word:gsub([[^'([^']*)'.*]], [['%1']]) + word = word:gsub([[^{([^}]*)}.*]], '{%1}') + word = word:gsub([[^`([^`]+)`.*]], '%1') + end + + return word +end + +return M diff --git a/src/nvim/help.c b/src/nvim/help.c @@ -24,6 +24,7 @@ #include "nvim/gettext_defs.h" #include "nvim/globals.h" #include "nvim/help.h" +#include "nvim/lua/executor.h" #include "nvim/macros_defs.h" #include "nvim/mark.h" #include "nvim/mbyte.h" @@ -314,227 +315,24 @@ static int help_compare(const void *s1, const void *s2) /// When "keep_lang" is true try keeping the language of the current buffer. int find_help_tags(const char *arg, int *num_matches, char ***matches, bool keep_lang) { - // Specific tags that either have a specific replacement or won't go - // through the generic rules. - static char *(except_tbl[][2]) = { - { "*", "star" }, - { "g*", "gstar" }, - { "[*", "[star" }, - { "]*", "]star" }, - { ":*", ":star" }, - { "/*", "/star" }, // NOLINT - { "/\\*", "/\\\\star" }, - { "\"*", "quotestar" }, - { "**", "starstar" }, - { "cpo-*", "cpo-star" }, - { "/\\(\\)", "/\\\\(\\\\)" }, - { "/\\%(\\)", "/\\\\%(\\\\)" }, - { "?", "?" }, - { "??", "??" }, - { ":?", ":?" }, - { "?<CR>", "?<CR>" }, - { "g?", "g?" }, - { "g?g?", "g?g?" }, - { "g??", "g??" }, - { "-?", "-?" }, - { "q?", "q?" }, - { "v_g?", "v_g?" }, - { "/\\?", "/\\\\?" }, - { "/\\z(\\)", "/\\\\z(\\\\)" }, - { "\\=", "\\\\=" }, - { ":s\\=", ":s\\\\=" }, - { "[count]", "\\[count]" }, - { "[quotex]", "\\[quotex]" }, - { "[range]", "\\[range]" }, - { ":[range]", ":\\[range]" }, - { "[pattern]", "\\[pattern]" }, - { "\\|", "\\\\bar" }, - { "\\%$", "/\\\\%\\$" }, - { "s/\\~", "s/\\\\\\~" }, - { "s/\\U", "s/\\\\U" }, - { "s/\\L", "s/\\\\L" }, - { "s/\\1", "s/\\\\1" }, - { "s/\\2", "s/\\\\2" }, - { "s/\\3", "s/\\\\3" }, - { "s/\\9", "s/\\\\9" }, - { NULL, NULL } - }; - - static const char *(expr_table[]) = { - "!=?", "!~?", "<=?", "<?", "==?", "=~?", - ">=?", ">?", "is?", "isnot?" - }; - char *d = IObuff; // assume IObuff is long enough! - d[0] = NUL; - - if (STRNICMP(arg, "expr-", 5) == 0) { - // When the string starting with "expr-" and containing '?' and matches - // the table, it is taken literally (but ~ is escaped). Otherwise '?' - // is recognized as a wildcard. - for (int i = (int)ARRAY_SIZE(expr_table); --i >= 0;) { - if (strcmp(arg + 5, expr_table[i]) == 0) { - for (int si = 0, di = 0;; si++) { - if (arg[si] == '~') { - d[di++] = '\\'; - } - d[di++] = arg[si]; - if (arg[si] == NUL) { - break; - } - } - break; - } - } - } else { - // Recognize a few exceptions to the rule. Some strings that contain - // '*'are changed to "star", otherwise '*' is recognized as a wildcard. - for (int i = 0; except_tbl[i][0] != NULL; i++) { - if (strcmp(arg, except_tbl[i][0]) == 0) { - STRCPY(d, except_tbl[i][1]); - break; - } - } - } + Error err = ERROR_INIT; + MAXSIZE_TEMP_ARRAY(args, 1); - if (d[0] == NUL) { // no match in table - // Replace "\S" with "/\\S", etc. Otherwise every tag is matched. - // Also replace "\%^" and "\%(", they match every tag too. - // Also "\zs", "\z1", etc. - // Also "\@<", "\@=", "\@<=", etc. - // And also "\_$" and "\_^". - if (arg[0] == '\\' - && ((arg[1] != NUL && arg[2] == NUL) - || (vim_strchr("%_z@", (uint8_t)arg[1]) != NULL - && arg[2] != NUL))) { - vim_snprintf(d, IOSIZE, "/\\\\%s", arg + 1); - // Check for "/\\_$", should be "/\\_\$" - if (d[3] == '_' && d[4] == '$') { - STRCPY(d + 4, "\\$"); - } - } else { - // Replace: - // "[:...:]" with "\[:...:]" - // "[++...]" with "\[++...]" - // "\{" with "\\{" -- matching "} \}" - if ((arg[0] == '[' && (arg[1] == ':' - || (arg[1] == '+' && arg[2] == '+'))) - || (arg[0] == '\\' && arg[1] == '{')) { - *d++ = '\\'; - } + ADD_C(args, CSTR_AS_OBJ(arg)); - // If tag starts with "('", skip the "(". Fixes CTRL-] on ('option'. - if (*arg == '(' && arg[1] == '\'') { - arg++; - } - for (const char *s = arg; *s; s++) { - // Replace "|" with "bar" and '"' with "quote" to match the name of - // the tags for these commands. - // Replace "*" with ".*" and "?" with "." to match command line - // completion. - // Insert a backslash before '~', '$' and '.' to avoid their - // special meaning. - if (d - IObuff > IOSIZE - 10) { // getting too long!? - break; - } - switch (*s) { - case '|': - STRCPY(d, "bar"); - d += 3; - continue; - case '"': - STRCPY(d, "quote"); - d += 5; - continue; - case '*': - *d++ = '.'; - break; - case '?': - *d++ = '.'; - continue; - case '$': - case '.': - case '~': - *d++ = '\\'; - break; - } + Object res = NLUA_EXEC_STATIC("return require'vim._core.help'.escape_subject(...)", + args, kRetObject, NULL, &err); - // Replace "^x" by "CTRL-X". Don't do this for "^_" to make - // ":help i_^_CTRL-D" work. - // Insert '-' before and after "CTRL-X" when applicable. - if ((uint8_t)(*s) < ' ' - || (*s == '^' && s[1] - && (ASCII_ISALPHA(s[1]) || vim_strchr("?@[\\]^", (uint8_t)s[1]) != NULL))) { - if (d > IObuff && d[-1] != '_' && d[-1] != '\\') { - *d++ = '_'; // prepend a '_' to make x_CTRL-x - } - STRCPY(d, "CTRL-"); - d += 5; - if (*s < ' ') { - *d++ = (char)(*s + '@'); - if (d[-1] == '\\') { - *d++ = '\\'; // double a backslash - } - } else { - *d++ = *++s; - } - if (s[1] != NUL && s[1] != '_') { - *d++ = '_'; // append a '_' - } - continue; - } else if (*s == '^') { // "^" or "CTRL-^" or "^_" - *d++ = '\\'; - } else if (s[0] == '\\' && s[1] != '\\' && *arg == '/' && s == arg + 1) { - // Insert a backslash before a backslash after a slash, for search - // pattern tags: "/\|" --> "/\\|". - *d++ = '\\'; - } - - // "CTRL-\_" -> "CTRL-\\_" to avoid the special meaning of "\_" in - // "CTRL-\_CTRL-N" - if (STRNICMP(s, "CTRL-\\_", 7) == 0) { - STRCPY(d, "CTRL-\\\\"); - d += 7; - s += 6; - } - - *d++ = *s; - - // If tag contains "({" or "([", tag terminates at the "(". - // This is for help on functions, e.g.: abs({expr}). - if (*s == '(' && (s[1] == '{' || s[1] == '[')) { - break; - } - - // If tag starts with ', toss everything after a second '. Fixes - // CTRL-] on 'option'. (would include the trailing '.'). - if (*s == '\'' && s > arg && *arg == '\'') { - break; - } - // Also '{' and '}'. Fixes CTRL-] on '{address}'. - if (*s == '}' && s > arg && *arg == '{') { - break; - } - } - *d = NUL; - - if (*IObuff == '`') { - if (d > IObuff + 2 && d[-1] == '`') { - // remove the backticks from `command` - memmove(IObuff, IObuff + 1, strlen(IObuff)); - d[-2] = NUL; - } else if (d > IObuff + 3 && d[-2] == '`' && d[-1] == ',') { - // remove the backticks and comma from `command`, - memmove(IObuff, IObuff + 1, strlen(IObuff)); - d[-3] = NUL; - } else if (d > IObuff + 4 && d[-3] == '`' - && d[-2] == '\\' && d[-1] == '.') { - // remove the backticks and dot from `command`\. - memmove(IObuff, IObuff + 1, strlen(IObuff)); - d[-4] = NUL; - } - } - } + if (ERROR_SET(&err)) { + emsg_multiline(err.msg, "lua_error", HLF_E, true); + api_clear_error(&err); + return FAIL; } + api_clear_error(&err); + + assert(res.type == kObjectTypeString); + xstrlcpy(IObuff, res.data.string.data, sizeof(IObuff)); + api_free_object(res); *matches = NULL; *num_matches = 0; diff --git a/test/functional/core/main_spec.lua b/test/functional/core/main_spec.lua @@ -222,6 +222,7 @@ describe('vim._core', function() 'vim._core.editor', 'vim._core.ex_cmd', 'vim._core.exrc', + 'vim._core.help', 'vim._core.options', 'vim._core.server', 'vim._core.shared', diff --git a/test/functional/ex_cmds/help_spec.lua b/test/functional/ex_cmds/help_spec.lua @@ -13,6 +13,114 @@ local write_file = t.write_file describe(':help', function() before_each(clear) + it('{subject}', function() + n.command('helptags ++t $VIMRUNTIME/doc') + local function check_tag(cmd, tag) + local cmd_ok = t.pcall(n.command, cmd) + local found = n.api.nvim_get_current_line():find(tag, 1, true) + local errmsg = (not cmd_ok and 'command failed') or (not found and 'tag not found') or '?' + assert( + cmd_ok and found, + string.format('Expected `:%s` to jump to tag `%s`, but %s', cmd, tag, errmsg) + ) + n.command('helpclose') + end + + check_tag('help', '*help.txt*') + check_tag('help |', '*bar*') + check_tag('help "*', '*quotestar*') + check_tag('help ch??khealth', '*:checkhealth*') + + check_tag([[help \\star]], [[*/\star*]]) + check_tag('help /*', [[*/\star*]]) + check_tag('help ?', '*?*') + check_tag('help ??', '*??*') + check_tag('help expr-!=?', '*expr-!=?*') + + check_tag('help /<cr>', '*/<CR>*') + check_tag([[help %(\\)]], [[*/\%(\)*]]) + check_tag('help %^', [[/\%^]]) + check_tag('help /_^G', '/_CTRL-G') + check_tag([[help \0]], [[\0]]) + + check_tag('help !', '*!*') + check_tag('help #{}', '*#{}*') + check_tag('help %:8', '*%:8*') + check_tag('help &', '*&*') + check_tag([[help '']], [[*''*]]) + check_tag([[help '(]], [[*'(*]]) + check_tag([[help '0]], [[*'0*]]) + check_tag([[help 'ac']], [[*'ac'*]]) + check_tag([[help '{]], [[*'{*]]) + check_tag('help )', '*)*') + check_tag('help +', '*+*') + + check_tag('help +opt', '*++opt*') + check_tag('help --', '*--*') + check_tag('help -?', '*-?*') + check_tag('help .', '*.*') + check_tag('help :', '*:*') + check_tag([[help :'}]], [[*:'}*]]) + check_tag('help :,', '*:,*') + check_tag('help :<abuf>', '*:<abuf>*') + check_tag([[help :\|]], [[*:\bar*]]) + check_tag([[help :\\|]], [[*:\bar*]]) + check_tag('help _', '*_*') + check_tag('help `', '*`*') + check_tag('help `(', '*`(*') + check_tag([[help `:ls`.]], [[*:ls*]]) + + check_tag('help [', '*[*') + check_tag('help [#', '*[#*') + check_tag([[help [']], [[*['*]]) + check_tag('help [(', '*[(*') + check_tag('help [++opt]', '*[++opt]*') + check_tag('help [:tab:]', '*[:tab:]*') + check_tag('help [count]', '*[count]*') + check_tag('help :[range]', '*:[range]*') + check_tag('help [<space>', '[<Space>') + check_tag('help ]_^D', ']_CTRL-D') + + check_tag([[help $HOME]], [[*$HOME*]]) + + check_tag('help <C-pagedown>', '*CTRL-<PageDown>*') + check_tag('help ^A', '*CTRL-A*') + check_tag('help ^W_+', '*CTRL-W_+*') + check_tag('help ^W<up>', '*CTRL-W_<Up>*') + check_tag('help ^W>', '*CTRL-W_>*') + check_tag('help ^W^]', '*CTRL-W_CTRL-]*') + check_tag('help ^W^', '*CTRL-W_^*') + check_tag('help ^W|', '*CTRL-W_bar*') + check_tag('help ^Wg<tab>', '*CTRL-W_g<Tab>*') + check_tag('help ^]', '*CTRL-]*') + check_tag('help ^{char}', 'CTRL-{char}') + check_tag('help [^L', '[_CTRL-L') + check_tag('help <C-', '*<C-*') + check_tag('help <S-CR>', '*<S-CR>*') + check_tag('help <<', '*<<*') + check_tag('help <>', '*<>*') + check_tag([[help i^x^y]], '*i_CTRL-X_CTRL-Y*') + check_tag([[help CTRL-\_CTRL-N]], [[*CTRL-\_CTRL-N*]]) + + check_tag([[exe "help i\<C-\>\<C-G>"]], [[*i_CTRL-\_CTRL-G*]]) + check_tag([[exe "help \<C-V>"]], '*CTRL-V*') + check_tag([[exe "help! arglistid([{winnr})"]], '*arglistid()*') + check_tag([[exe "help! 'autoindent'."]], [[*'autoindent'*]]) + + check_tag('exusage', '*:index*') + check_tag('viusage', '*normal-index*') + + -- Test cases for removed exceptions + check_tag('help /\\(\\)', '*/\\(\\)*') + check_tag('help :s\\=', '*:s\\=*') + check_tag([[help expr-']], [[*expr-'*]]) + check_tag('help expr-barbar', '*expr-barbar*') + check_tag([[help s/\9]], [[*s/\9*]]) + check_tag([[help s/\U]], [[*s/\U*]]) + check_tag([[help s/\~]], [[*s/\~*]]) + check_tag([[help \|]], [[*/\bar*]]) + end) + it('window closed makes cursor return to a valid win/buf #9773', function() n.add_builddir_to_rtp() command('help help')