commit a6252c6683cccdd8ed56fa4ad70df9ea606e1498
parent 16c8a908ef675feeca459ba42e4ed04e181ac89f
Author: Yochem van Rosmalen <git@yochem.nl>
Date: Tue, 10 Feb 2026 13:43:17 +0100
refactor(help): move escaping logic to Lua #37757
Problem:
Escaping logic for {subject} in ex cmd `:help {subject}` is done in a
messy 200+ lines C function which is hard to maintain and improve.
Solution:
Rewrite in Lua. Use `string.gsub()` instead of looping over characters
to improve clarity and add many more tests to be able to confidently
improve current code later on.
Diffstat:
4 files changed, 255 insertions(+), 217 deletions(-)
diff --git a/runtime/lua/vim/_core/help.lua b/runtime/lua/vim/_core/help.lua
@@ -0,0 +1,131 @@
+local M = {}
+
+local tag_exceptions = {
+ -- Interpret asterisk (star, '*') literal but name it 'star'
+ ['*'] = 'star',
+ ['g*'] = 'gstar',
+ ['[*'] = '[star',
+ [']*'] = ']star',
+ [':*'] = ':star',
+ ['/*'] = '/star',
+ ['/\\*'] = '/\\\\star',
+ ['\\\\star'] = '/\\\\star',
+ ['"*'] = 'quotestar',
+ ['**'] = 'starstar',
+ ['cpo-*'] = 'cpo-star',
+
+ -- Literal question mark '?'
+ ['?'] = '?',
+ ['??'] = '??',
+ [':?'] = ':?',
+ ['?<CR>'] = '?<CR>',
+ ['g?'] = 'g?',
+ ['g?g?'] = 'g?g?',
+ ['g??'] = 'g??',
+ ['-?'] = '-?',
+ ['q?'] = 'q?',
+ ['v_g?'] = 'v_g?',
+ ['/\\?'] = '/\\\\?',
+
+ -- Backslash-escaping hell
+ ['/\\%(\\)'] = '/\\\\%(\\\\)',
+ ['/\\z(\\)'] = '/\\\\z(\\\\)',
+ ['\\='] = '\\\\=',
+ ['\\%$'] = '/\\\\%\\$',
+
+ -- Some expressions are literal but without the 'expr-' prefix. Note: not all 'expr-' subjects!
+ ['expr-!=?'] = '!=?',
+ ['expr-!~?'] = '!\\~?',
+ ['expr-<=?'] = '<=?',
+ ['expr-<?'] = '<?',
+ ['expr-==?'] = '==?',
+ ['expr-=~?'] = '=~?',
+ ['expr->=?'] = '>=?',
+ ['expr->?'] = '>?',
+ ['expr-is?'] = 'is?',
+ ['expr-isnot?'] = 'isnot?',
+}
+
+---Transform a help tag query into a search pattern for find_tags().
+---
+---This function converts user input from `:help {subject}` into a regex pattern that balances
+---literal matching with wildcard support. Vim help tags can contain characters that have special
+---meaning in regex (like *, ?, |), but we also want to support wildcard searches.
+---
+---Examples:
+--- '*' --> 'star' (literal match for the * command help tag)
+--- 'buffer*' --> 'buffer.*' (wildcard: find all buffer-related tags)
+--- 'CTRL-W' --> stays as 'CTRL-W' (already in tag format)
+--- '^A' --> 'CTRL-A' (caret notation converted to tag format)
+---
+---@param word string The help subject as entered by the user
+---@return string pattern The escaped regex pattern to search for in tag files
+function M.escape_subject(word)
+ local replacement = tag_exceptions[word]
+ if replacement then
+ return replacement
+ end
+
+ -- Add prefix '/\\' to patterns starting with a backslash
+ -- Examples: \S, \%^, \%(, \zs, \z1, \@<, \@=, \@<=, \_$, \_^
+ if word:match([[^\.$]]) or word:match('^\\[%%_z@]') then
+ word = [[/\]] .. word
+ word = word:gsub('[$.~]', [[\%0]])
+ word = word:gsub('|', 'bar')
+ else
+ -- Fix for bracket expressions and curly braces:
+ -- '\' --> '\\' (needs to come first)
+ -- '[' --> '\[' (escape the opening bracket)
+ -- ':[' --> ':\[' (escape the opening bracket)
+ -- '\{' --> '\\{' (for '\{' pattern matching)
+ -- '(' --> '' (parentheses around option tags should be ignored)
+ word = word:gsub([[\+]], [[\\]])
+ word = word:gsub([[^%[]], [[\[]])
+ word = word:gsub([[^:%[]], [[:\[]])
+ word = word:gsub([[^\{]], [[\\{]])
+ word = word:gsub([[^%(']], [[']])
+
+ word = word:gsub('|', 'bar')
+ word = word:gsub([["]], 'quote')
+ word = word:gsub('[$.~]', [[\%0]])
+ word = word:gsub('%*', '.*')
+ word = word:gsub('?', '.')
+
+ -- Handle control characters.
+ -- First convert raw control chars to the caret notation
+ -- E.g. 0x01 --> '^A' etc.
+ ---@type string
+ word = word:gsub('([\1-\31])', function(ctrl_char)
+ -- '^\' needs an extra backslash
+ local repr = string.char(ctrl_char:byte() + 64):gsub([[\]], [[\\]])
+ return '^' .. repr
+ end)
+
+ -- Change caret notation to 'CTRL-', except '^_'
+ -- E.g. 'i^G^J' --> 'iCTRL-GCTRL-J'
+ word = word:gsub('%^([^_])', 'CTRL-%1')
+ -- Add underscores around 'CTRL-X' characters
+ -- E.g. 'iCTRL-GCTRL-J' --> 'i_CTRL-G_CTRL-J'
+ -- Only exception: 'CTRL-{character}'
+ word = word:gsub('([^_])CTRL%-', '%1_CTRL-')
+ word = word:gsub('(CTRL%-[^{])([^_\\])', '%1_%2')
+
+ -- Skip function arguments
+ -- E.g. 'abs({expr})' --> 'abs'
+ -- E.g. 'abs([arg])' --> 'abs'
+ word = word:gsub('%({.*', '')
+ word = word:gsub('%(%[.*', '')
+
+ -- Skip punctuation after second apostrophe/curly brace
+ -- E.g. ''option',' --> ''option''
+ -- E.g. '{address},' --> '{address}'
+ -- E.g. '`command`,' --> 'command' (backticks are removed too, but '``' stays '``')
+ word = word:gsub([[^'([^']*)'.*]], [['%1']])
+ word = word:gsub([[^{([^}]*)}.*]], '{%1}')
+ word = word:gsub([[^`([^`]+)`.*]], '%1')
+ end
+
+ return word
+end
+
+return M
diff --git a/src/nvim/help.c b/src/nvim/help.c
@@ -24,6 +24,7 @@
#include "nvim/gettext_defs.h"
#include "nvim/globals.h"
#include "nvim/help.h"
+#include "nvim/lua/executor.h"
#include "nvim/macros_defs.h"
#include "nvim/mark.h"
#include "nvim/mbyte.h"
@@ -314,227 +315,24 @@ static int help_compare(const void *s1, const void *s2)
/// When "keep_lang" is true try keeping the language of the current buffer.
int find_help_tags(const char *arg, int *num_matches, char ***matches, bool keep_lang)
{
- // Specific tags that either have a specific replacement or won't go
- // through the generic rules.
- static char *(except_tbl[][2]) = {
- { "*", "star" },
- { "g*", "gstar" },
- { "[*", "[star" },
- { "]*", "]star" },
- { ":*", ":star" },
- { "/*", "/star" }, // NOLINT
- { "/\\*", "/\\\\star" },
- { "\"*", "quotestar" },
- { "**", "starstar" },
- { "cpo-*", "cpo-star" },
- { "/\\(\\)", "/\\\\(\\\\)" },
- { "/\\%(\\)", "/\\\\%(\\\\)" },
- { "?", "?" },
- { "??", "??" },
- { ":?", ":?" },
- { "?<CR>", "?<CR>" },
- { "g?", "g?" },
- { "g?g?", "g?g?" },
- { "g??", "g??" },
- { "-?", "-?" },
- { "q?", "q?" },
- { "v_g?", "v_g?" },
- { "/\\?", "/\\\\?" },
- { "/\\z(\\)", "/\\\\z(\\\\)" },
- { "\\=", "\\\\=" },
- { ":s\\=", ":s\\\\=" },
- { "[count]", "\\[count]" },
- { "[quotex]", "\\[quotex]" },
- { "[range]", "\\[range]" },
- { ":[range]", ":\\[range]" },
- { "[pattern]", "\\[pattern]" },
- { "\\|", "\\\\bar" },
- { "\\%$", "/\\\\%\\$" },
- { "s/\\~", "s/\\\\\\~" },
- { "s/\\U", "s/\\\\U" },
- { "s/\\L", "s/\\\\L" },
- { "s/\\1", "s/\\\\1" },
- { "s/\\2", "s/\\\\2" },
- { "s/\\3", "s/\\\\3" },
- { "s/\\9", "s/\\\\9" },
- { NULL, NULL }
- };
-
- static const char *(expr_table[]) = {
- "!=?", "!~?", "<=?", "<?", "==?", "=~?",
- ">=?", ">?", "is?", "isnot?"
- };
- char *d = IObuff; // assume IObuff is long enough!
- d[0] = NUL;
-
- if (STRNICMP(arg, "expr-", 5) == 0) {
- // When the string starting with "expr-" and containing '?' and matches
- // the table, it is taken literally (but ~ is escaped). Otherwise '?'
- // is recognized as a wildcard.
- for (int i = (int)ARRAY_SIZE(expr_table); --i >= 0;) {
- if (strcmp(arg + 5, expr_table[i]) == 0) {
- for (int si = 0, di = 0;; si++) {
- if (arg[si] == '~') {
- d[di++] = '\\';
- }
- d[di++] = arg[si];
- if (arg[si] == NUL) {
- break;
- }
- }
- break;
- }
- }
- } else {
- // Recognize a few exceptions to the rule. Some strings that contain
- // '*'are changed to "star", otherwise '*' is recognized as a wildcard.
- for (int i = 0; except_tbl[i][0] != NULL; i++) {
- if (strcmp(arg, except_tbl[i][0]) == 0) {
- STRCPY(d, except_tbl[i][1]);
- break;
- }
- }
- }
+ Error err = ERROR_INIT;
+ MAXSIZE_TEMP_ARRAY(args, 1);
- if (d[0] == NUL) { // no match in table
- // Replace "\S" with "/\\S", etc. Otherwise every tag is matched.
- // Also replace "\%^" and "\%(", they match every tag too.
- // Also "\zs", "\z1", etc.
- // Also "\@<", "\@=", "\@<=", etc.
- // And also "\_$" and "\_^".
- if (arg[0] == '\\'
- && ((arg[1] != NUL && arg[2] == NUL)
- || (vim_strchr("%_z@", (uint8_t)arg[1]) != NULL
- && arg[2] != NUL))) {
- vim_snprintf(d, IOSIZE, "/\\\\%s", arg + 1);
- // Check for "/\\_$", should be "/\\_\$"
- if (d[3] == '_' && d[4] == '$') {
- STRCPY(d + 4, "\\$");
- }
- } else {
- // Replace:
- // "[:...:]" with "\[:...:]"
- // "[++...]" with "\[++...]"
- // "\{" with "\\{" -- matching "} \}"
- if ((arg[0] == '[' && (arg[1] == ':'
- || (arg[1] == '+' && arg[2] == '+')))
- || (arg[0] == '\\' && arg[1] == '{')) {
- *d++ = '\\';
- }
+ ADD_C(args, CSTR_AS_OBJ(arg));
- // If tag starts with "('", skip the "(". Fixes CTRL-] on ('option'.
- if (*arg == '(' && arg[1] == '\'') {
- arg++;
- }
- for (const char *s = arg; *s; s++) {
- // Replace "|" with "bar" and '"' with "quote" to match the name of
- // the tags for these commands.
- // Replace "*" with ".*" and "?" with "." to match command line
- // completion.
- // Insert a backslash before '~', '$' and '.' to avoid their
- // special meaning.
- if (d - IObuff > IOSIZE - 10) { // getting too long!?
- break;
- }
- switch (*s) {
- case '|':
- STRCPY(d, "bar");
- d += 3;
- continue;
- case '"':
- STRCPY(d, "quote");
- d += 5;
- continue;
- case '*':
- *d++ = '.';
- break;
- case '?':
- *d++ = '.';
- continue;
- case '$':
- case '.':
- case '~':
- *d++ = '\\';
- break;
- }
+ Object res = NLUA_EXEC_STATIC("return require'vim._core.help'.escape_subject(...)",
+ args, kRetObject, NULL, &err);
- // Replace "^x" by "CTRL-X". Don't do this for "^_" to make
- // ":help i_^_CTRL-D" work.
- // Insert '-' before and after "CTRL-X" when applicable.
- if ((uint8_t)(*s) < ' '
- || (*s == '^' && s[1]
- && (ASCII_ISALPHA(s[1]) || vim_strchr("?@[\\]^", (uint8_t)s[1]) != NULL))) {
- if (d > IObuff && d[-1] != '_' && d[-1] != '\\') {
- *d++ = '_'; // prepend a '_' to make x_CTRL-x
- }
- STRCPY(d, "CTRL-");
- d += 5;
- if (*s < ' ') {
- *d++ = (char)(*s + '@');
- if (d[-1] == '\\') {
- *d++ = '\\'; // double a backslash
- }
- } else {
- *d++ = *++s;
- }
- if (s[1] != NUL && s[1] != '_') {
- *d++ = '_'; // append a '_'
- }
- continue;
- } else if (*s == '^') { // "^" or "CTRL-^" or "^_"
- *d++ = '\\';
- } else if (s[0] == '\\' && s[1] != '\\' && *arg == '/' && s == arg + 1) {
- // Insert a backslash before a backslash after a slash, for search
- // pattern tags: "/\|" --> "/\\|".
- *d++ = '\\';
- }
-
- // "CTRL-\_" -> "CTRL-\\_" to avoid the special meaning of "\_" in
- // "CTRL-\_CTRL-N"
- if (STRNICMP(s, "CTRL-\\_", 7) == 0) {
- STRCPY(d, "CTRL-\\\\");
- d += 7;
- s += 6;
- }
-
- *d++ = *s;
-
- // If tag contains "({" or "([", tag terminates at the "(".
- // This is for help on functions, e.g.: abs({expr}).
- if (*s == '(' && (s[1] == '{' || s[1] == '[')) {
- break;
- }
-
- // If tag starts with ', toss everything after a second '. Fixes
- // CTRL-] on 'option'. (would include the trailing '.').
- if (*s == '\'' && s > arg && *arg == '\'') {
- break;
- }
- // Also '{' and '}'. Fixes CTRL-] on '{address}'.
- if (*s == '}' && s > arg && *arg == '{') {
- break;
- }
- }
- *d = NUL;
-
- if (*IObuff == '`') {
- if (d > IObuff + 2 && d[-1] == '`') {
- // remove the backticks from `command`
- memmove(IObuff, IObuff + 1, strlen(IObuff));
- d[-2] = NUL;
- } else if (d > IObuff + 3 && d[-2] == '`' && d[-1] == ',') {
- // remove the backticks and comma from `command`,
- memmove(IObuff, IObuff + 1, strlen(IObuff));
- d[-3] = NUL;
- } else if (d > IObuff + 4 && d[-3] == '`'
- && d[-2] == '\\' && d[-1] == '.') {
- // remove the backticks and dot from `command`\.
- memmove(IObuff, IObuff + 1, strlen(IObuff));
- d[-4] = NUL;
- }
- }
- }
+ if (ERROR_SET(&err)) {
+ emsg_multiline(err.msg, "lua_error", HLF_E, true);
+ api_clear_error(&err);
+ return FAIL;
}
+ api_clear_error(&err);
+
+ assert(res.type == kObjectTypeString);
+ xstrlcpy(IObuff, res.data.string.data, sizeof(IObuff));
+ api_free_object(res);
*matches = NULL;
*num_matches = 0;
diff --git a/test/functional/core/main_spec.lua b/test/functional/core/main_spec.lua
@@ -222,6 +222,7 @@ describe('vim._core', function()
'vim._core.editor',
'vim._core.ex_cmd',
'vim._core.exrc',
+ 'vim._core.help',
'vim._core.options',
'vim._core.server',
'vim._core.shared',
diff --git a/test/functional/ex_cmds/help_spec.lua b/test/functional/ex_cmds/help_spec.lua
@@ -13,6 +13,114 @@ local write_file = t.write_file
describe(':help', function()
before_each(clear)
+ it('{subject}', function()
+ n.command('helptags ++t $VIMRUNTIME/doc')
+ local function check_tag(cmd, tag)
+ local cmd_ok = t.pcall(n.command, cmd)
+ local found = n.api.nvim_get_current_line():find(tag, 1, true)
+ local errmsg = (not cmd_ok and 'command failed') or (not found and 'tag not found') or '?'
+ assert(
+ cmd_ok and found,
+ string.format('Expected `:%s` to jump to tag `%s`, but %s', cmd, tag, errmsg)
+ )
+ n.command('helpclose')
+ end
+
+ check_tag('help', '*help.txt*')
+ check_tag('help |', '*bar*')
+ check_tag('help "*', '*quotestar*')
+ check_tag('help ch??khealth', '*:checkhealth*')
+
+ check_tag([[help \\star]], [[*/\star*]])
+ check_tag('help /*', [[*/\star*]])
+ check_tag('help ?', '*?*')
+ check_tag('help ??', '*??*')
+ check_tag('help expr-!=?', '*expr-!=?*')
+
+ check_tag('help /<cr>', '*/<CR>*')
+ check_tag([[help %(\\)]], [[*/\%(\)*]])
+ check_tag('help %^', [[/\%^]])
+ check_tag('help /_^G', '/_CTRL-G')
+ check_tag([[help \0]], [[\0]])
+
+ check_tag('help !', '*!*')
+ check_tag('help #{}', '*#{}*')
+ check_tag('help %:8', '*%:8*')
+ check_tag('help &', '*&*')
+ check_tag([[help '']], [[*''*]])
+ check_tag([[help '(]], [[*'(*]])
+ check_tag([[help '0]], [[*'0*]])
+ check_tag([[help 'ac']], [[*'ac'*]])
+ check_tag([[help '{]], [[*'{*]])
+ check_tag('help )', '*)*')
+ check_tag('help +', '*+*')
+
+ check_tag('help +opt', '*++opt*')
+ check_tag('help --', '*--*')
+ check_tag('help -?', '*-?*')
+ check_tag('help .', '*.*')
+ check_tag('help :', '*:*')
+ check_tag([[help :'}]], [[*:'}*]])
+ check_tag('help :,', '*:,*')
+ check_tag('help :<abuf>', '*:<abuf>*')
+ check_tag([[help :\|]], [[*:\bar*]])
+ check_tag([[help :\\|]], [[*:\bar*]])
+ check_tag('help _', '*_*')
+ check_tag('help `', '*`*')
+ check_tag('help `(', '*`(*')
+ check_tag([[help `:ls`.]], [[*:ls*]])
+
+ check_tag('help [', '*[*')
+ check_tag('help [#', '*[#*')
+ check_tag([[help [']], [[*['*]])
+ check_tag('help [(', '*[(*')
+ check_tag('help [++opt]', '*[++opt]*')
+ check_tag('help [:tab:]', '*[:tab:]*')
+ check_tag('help [count]', '*[count]*')
+ check_tag('help :[range]', '*:[range]*')
+ check_tag('help [<space>', '[<Space>')
+ check_tag('help ]_^D', ']_CTRL-D')
+
+ check_tag([[help $HOME]], [[*$HOME*]])
+
+ check_tag('help <C-pagedown>', '*CTRL-<PageDown>*')
+ check_tag('help ^A', '*CTRL-A*')
+ check_tag('help ^W_+', '*CTRL-W_+*')
+ check_tag('help ^W<up>', '*CTRL-W_<Up>*')
+ check_tag('help ^W>', '*CTRL-W_>*')
+ check_tag('help ^W^]', '*CTRL-W_CTRL-]*')
+ check_tag('help ^W^', '*CTRL-W_^*')
+ check_tag('help ^W|', '*CTRL-W_bar*')
+ check_tag('help ^Wg<tab>', '*CTRL-W_g<Tab>*')
+ check_tag('help ^]', '*CTRL-]*')
+ check_tag('help ^{char}', 'CTRL-{char}')
+ check_tag('help [^L', '[_CTRL-L')
+ check_tag('help <C-', '*<C-*')
+ check_tag('help <S-CR>', '*<S-CR>*')
+ check_tag('help <<', '*<<*')
+ check_tag('help <>', '*<>*')
+ check_tag([[help i^x^y]], '*i_CTRL-X_CTRL-Y*')
+ check_tag([[help CTRL-\_CTRL-N]], [[*CTRL-\_CTRL-N*]])
+
+ check_tag([[exe "help i\<C-\>\<C-G>"]], [[*i_CTRL-\_CTRL-G*]])
+ check_tag([[exe "help \<C-V>"]], '*CTRL-V*')
+ check_tag([[exe "help! arglistid([{winnr})"]], '*arglistid()*')
+ check_tag([[exe "help! 'autoindent'."]], [[*'autoindent'*]])
+
+ check_tag('exusage', '*:index*')
+ check_tag('viusage', '*normal-index*')
+
+ -- Test cases for removed exceptions
+ check_tag('help /\\(\\)', '*/\\(\\)*')
+ check_tag('help :s\\=', '*:s\\=*')
+ check_tag([[help expr-']], [[*expr-'*]])
+ check_tag('help expr-barbar', '*expr-barbar*')
+ check_tag([[help s/\9]], [[*s/\9*]])
+ check_tag([[help s/\U]], [[*s/\U*]])
+ check_tag([[help s/\~]], [[*s/\~*]])
+ check_tag([[help \|]], [[*/\bar*]])
+ end)
+
it('window closed makes cursor return to a valid win/buf #9773', function()
n.add_builddir_to_rtp()
command('help help')