neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

commit 9843573a61f42b7f9ea5ded0f5221320ac1925c5
parent 6f904cfef15b06473710977d3656e67ef3a930d0
Author: zeertzjq <zeertzjq@outlook.com>
Date:   Thu, 14 Aug 2025 07:18:05 +0800

Merge pull request #35320 from zeertzjq/vim-9.1.1627

vim-patch:9.1.{1627,1628}
Diffstat:
Mruntime/doc/news.txt | 2++
Mruntime/doc/pattern.txt | 73+++++++++++++++++++++++++++++++++++++++++++++----------------------------
Mruntime/doc/vimfn.txt | 3---
Mruntime/lua/vim/_meta/vimfn.lua | 3---
Msrc/gen/gen_eval.lua | 4++++
Msrc/nvim/buffer.c | 6+++---
Msrc/nvim/cmdexpand.c | 5+++--
Msrc/nvim/eval.lua | 3---
Asrc/nvim/fuzzy.c | 922+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/nvim/fuzzy.h | 25+++++++++++++++++++++++++
Msrc/nvim/insexpand.c | 38+++++++++++++++++++++-----------------
Msrc/nvim/mapping.c | 6+++---
Msrc/nvim/option.c | 4++--
Msrc/nvim/popupmenu.c | 7+------
Msrc/nvim/quickfix.c | 7++++---
Msrc/nvim/search.c | 921-------------------------------------------------------------------------------
Msrc/nvim/search.h | 13-------------
Mtest/old/testdir/test_cmdline.vim | 13+++++--------
Mtest/old/testdir/test_matchfuzzy.vim | 131+++++++++++++++++++++++++++++++++++++++----------------------------------------
19 files changed, 1104 insertions(+), 1082 deletions(-)

diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt @@ -327,6 +327,8 @@ These existing features changed their behavior. • |$VIM| and |$VIMRUNTIME| no longer check for Vim version-specific runtime directory `vim{number}` (e.g. `vim82`). • 'scrollback' maximum value increased from 100000 to 1000000 +• |matchfuzzy()| and |matchfuzzypos()| use an improved fuzzy matching algorithm + (same as fzy). ============================================================================== REMOVED FEATURES *news-removed* diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt @@ -1481,34 +1481,51 @@ Finally, these constructs are unique to Perl: ============================================================================== 11. Fuzzy matching *fuzzy-matching* -Fuzzy matching refers to matching strings using a non-exact search string. -Fuzzy matching will match a string, if all the characters in the search string -are present anywhere in the string in the same order. Case is ignored. In a -matched string, other characters can be present between two consecutive -characters in the search string. If the search string has multiple words, then -each word is matched separately. So the words in the search string can be -present in any order in a string. - -Fuzzy matching assigns a score for each matched string based on the following -criteria: - - The number of sequentially matching characters. - - The number of characters (distance) between two consecutive matching - characters. - - Matches at the beginning of a word - - Matches at a camel case character (e.g. Case in CamelCase) - - Matches after a path separator or a hyphen. - - The number of unmatched characters in a string. - - A full/exact match is preferred. -The matching string with the highest score is returned first. - -For example, when you search for the "get pat" string using fuzzy matching, it -will match the strings "GetPattern", "PatternGet", "getPattern", "patGetter", -"getSomePattern", "MatchpatternGet" etc. - -The functions |matchfuzzy()| and |matchfuzzypos()| can be used to fuzzy search -a string in a List of strings. The matchfuzzy() function returns a List of -matching strings. The matchfuzzypos() functions returns the List of matches, -the matching positions and the fuzzy match scores. +Fuzzy matching scores how well a string matches a pattern when the pattern +characters appear in order but not necessarily contiguously. + +Example: > + Pattern: "vim" + Candidates: "vim" -> perfect + "vimeo" -> good (v i m) + "voice mail" -> weaker (v _ i _ _ _ m) + "vintage" -> no match (no "m") +< +If the search string has multiple words, each word is matched separately and +may appear in any order in the candidate. For example "get pat" matches +"GetPattern", "PatternGet", "getPattern", "patGetter", "getSomePattern", +"MatchpatternGet", etc. + +The 'ignorecase' and 'smartcase' options do not apply, case is ignored if the +pattern is all lower case. + +Vim's implementation is based on the algorithm from the fzy project: +https://github.com/jhawthorn/fzy + +It uses dynamic programming to compute an optimal score for a given pattern +and candidate. + +The algorithm works in two stages: + +1. Forward pass + Scan the candidate left to right, tracking the best score for each + pattern position. Matches score higher when they occur at the start + of the candidate, the start of a word (space, underscore, dash, + camelCase), or directly after the previous match. + +2. Backward pass + Start from the best-scoring end position and step back to find match + positions, ensuring the alignment is optimal. + +Vim extends the original algorithm to support multibyte codepoints, allowing +correct matching for UTF-8 and other encodings. + +Time complexity is O(pattern * candidate). Memory usage is proportional +to the same. + +The |matchfuzzy()| and |matchfuzzypos()| functions perform fuzzy searching in +a List of strings. |matchfuzzy()| returns the matching strings, while +|matchfuzzypos()| returns the matches along with their positions and scores. The "f" flag of `:vimgrep` enables fuzzy matching. diff --git a/runtime/doc/vimfn.txt b/runtime/doc/vimfn.txt @@ -6520,9 +6520,6 @@ matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* given sequence. limit Maximum number of matches in {list} to be returned. Zero means no limit. - camelcase Use enhanced camel case scoring making results - better suited for completion related to - programming languages. Defaults to v:true. If {list} is a list of dictionaries, then the optional {dict} argument supports the following additional items: diff --git a/runtime/lua/vim/_meta/vimfn.lua b/runtime/lua/vim/_meta/vimfn.lua @@ -5918,9 +5918,6 @@ function vim.fn.matchend(expr, pat, start, count) end --- given sequence. --- limit Maximum number of matches in {list} to be --- returned. Zero means no limit. ---- camelcase Use enhanced camel case scoring making results ---- better suited for completion related to ---- programming languages. Defaults to v:true. --- --- If {list} is a list of dictionaries, then the optional {dict} --- argument supports the following additional items: diff --git a/src/gen/gen_eval.lua b/src/gen/gen_eval.lua @@ -13,6 +13,7 @@ local hashpipe = assert(io.open(funcsfname, 'wb')) hashpipe:write([[ #include "nvim/arglist.h" +#include "nvim/channel.h" #include "nvim/cmdexpand.h" #include "nvim/cmdhist.h" #include "nvim/diff.h" @@ -28,7 +29,10 @@ hashpipe:write([[ #include "nvim/ex_docmd.h" #include "nvim/ex_getln.h" #include "nvim/fold.h" +#include "nvim/fuzzy.h" #include "nvim/getchar.h" +#include "nvim/indent.h" +#include "nvim/indent_c.h" #include "nvim/insexpand.h" #include "nvim/mapping.h" #include "nvim/match.h" diff --git a/src/nvim/buffer.c b/src/nvim/buffer.c @@ -58,6 +58,7 @@ #include "nvim/file_search.h" #include "nvim/fileio.h" #include "nvim/fold.h" +#include "nvim/fuzzy.h" #include "nvim/garray.h" #include "nvim/garray_defs.h" #include "nvim/getchar.h" @@ -100,7 +101,6 @@ #include "nvim/regexp_defs.h" #include "nvim/runtime.h" #include "nvim/runtime_defs.h" -#include "nvim/search.h" #include "nvim/spell.h" #include "nvim/state_defs.h" #include "nvim/statusline.h" @@ -2477,12 +2477,12 @@ int ExpandBufnames(char *pat, int *num_file, char ***file, int options) } else { p = NULL; // first try matching with the short file name - if ((score = fuzzy_match_str(buf->b_sfname, pat)) != 0) { + if ((score = fuzzy_match_str(buf->b_sfname, pat)) != FUZZY_SCORE_NONE) { p = buf->b_sfname; } if (p == NULL) { // next try matching with the full path file name - if ((score = fuzzy_match_str(buf->b_ffname, pat)) != 0) { + if ((score = fuzzy_match_str(buf->b_ffname, pat)) != FUZZY_SCORE_NONE) { p = buf->b_ffname; } } diff --git a/src/nvim/cmdexpand.c b/src/nvim/cmdexpand.c @@ -29,6 +29,7 @@ #include "nvim/ex_cmds_defs.h" #include "nvim/ex_docmd.h" #include "nvim/ex_getln.h" +#include "nvim/fuzzy.h" #include "nvim/garray.h" #include "nvim/garray_defs.h" #include "nvim/getchar.h" @@ -3096,7 +3097,7 @@ void ExpandGeneric(const char *const pat, expand_T *xp, regmatch_T *regmatch, ch match = vim_regexec(regmatch, str, 0); } else { score = fuzzy_match_str(str, pat); - match = (score != 0); + match = (score != FUZZY_SCORE_NONE); } } else { match = true; @@ -3408,7 +3409,7 @@ static int ExpandUserDefined(const char *const pat, expand_T *xp, regmatch_T *re match = vim_regexec(regmatch, s, 0); } else { score = fuzzy_match_str(s, pat); - match = (score != 0); + match = (score != FUZZY_SCORE_NONE); } } else { match = true; // match everything diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua @@ -7272,9 +7272,6 @@ M.funcs = { given sequence. limit Maximum number of matches in {list} to be returned. Zero means no limit. - camelcase Use enhanced camel case scoring making results - better suited for completion related to - programming languages. Defaults to v:true. If {list} is a list of dictionaries, then the optional {dict} argument supports the following additional items: diff --git a/src/nvim/fuzzy.c b/src/nvim/fuzzy.c @@ -0,0 +1,922 @@ +// fuzzy.c: fuzzy matching algorithm and related functions +// +// Portions of this file are adapted from fzy (https://github.com/jhawthorn/fzy) +// Original code: +// Copyright (c) 2014 John Hawthorn +// Licensed under the MIT License. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include <assert.h> +#include <limits.h> +#include <math.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "nvim/ascii_defs.h" +#include "nvim/charset.h" +#include "nvim/errors.h" +#include "nvim/eval.h" +#include "nvim/eval/typval.h" +#include "nvim/fuzzy.h" +#include "nvim/garray.h" +#include "nvim/garray_defs.h" +#include "nvim/globals.h" +#include "nvim/insexpand.h" +#include "nvim/macros_defs.h" +#include "nvim/mbyte.h" +#include "nvim/memline.h" +#include "nvim/memory.h" +#include "nvim/message.h" + +typedef double score_t; + +#define SCORE_MAX INFINITY +#define SCORE_MIN (-INFINITY) +#define SCORE_SCALE 1000 + +typedef struct { + int idx; ///< used for stable sort + listitem_T *item; + int score; + list_T *lmatchpos; + char *pat; + char *itemstr; + bool itemstr_allocated; + int startpos; +} fuzzyItem_T; + +typedef struct match_struct match_struct; + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "fuzzy.c.generated.h" +#endif + +/// fuzzy_match() +/// +/// @return true if "pat_arg" matches "str". Also returns the match score in +/// "outScore" and the matching character positions in "matches". +bool fuzzy_match(char *const str, const char *const pat_arg, const bool matchseq, + int *const outScore, uint32_t *const matches, const int maxMatches) + FUNC_ATTR_NONNULL_ALL +{ + bool complete = false; + int numMatches = 0; + + *outScore = 0; + + char *const save_pat = xstrdup(pat_arg); + char *pat = save_pat; + char *p = pat; + + // Try matching each word in "pat_arg" in "str" + while (true) { + if (matchseq) { + complete = true; + } else { + // Extract one word from the pattern (separated by space) + p = skipwhite(p); + if (*p == NUL) { + break; + } + pat = p; + while (*p != NUL && !ascii_iswhite(utf_ptr2char(p))) { + MB_PTR_ADV(p); + } + if (*p == NUL) { // processed all the words + complete = true; + } + *p = NUL; + } + + int score = FUZZY_SCORE_NONE; + if (has_match(pat, str)) { + score_t fzy_score = match_positions(pat, str, matches + numMatches); + score = (fzy_score == (score_t)SCORE_MIN + ? INT_MIN + 1 + : (fzy_score == (score_t)SCORE_MAX + ? INT_MAX + : (fzy_score < 0 + ? (int)ceil(fzy_score * SCORE_SCALE - 0.5) + : (int)floor(fzy_score * SCORE_SCALE + 0.5)))); + } + + if (score == FUZZY_SCORE_NONE) { + numMatches = 0; + *outScore = FUZZY_SCORE_NONE; + break; + } + + if (score > 0 && *outScore > INT_MAX - score) { + *outScore = INT_MAX; + } else if (score < 0 && *outScore < INT_MIN + 1 - score) { + *outScore = INT_MIN + 1; + } else { + *outScore += score; + } + + numMatches += mb_charlen(pat); + + if (complete || numMatches >= maxMatches) { + break; + } + + // try matching the next word + p++; + } + + xfree(save_pat); + return numMatches != 0; +} + +/// Sort the fuzzy matches in the descending order of the match score. +/// For items with same score, retain the order using the index (stable sort) +static int fuzzy_match_item_compare(const void *const s1, const void *const s2) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE +{ + const int v1 = ((const fuzzyItem_T *)s1)->score; + const int v2 = ((const fuzzyItem_T *)s2)->score; + + if (v1 == v2) { + const char *const pat = ((const fuzzyItem_T *)s1)->pat; + const size_t patlen = strlen(pat); + int startpos = ((const fuzzyItem_T *)s1)->startpos; + const bool exact_match1 = startpos >= 0 + && strncmp(pat, ((fuzzyItem_T *)s1)->itemstr + startpos, patlen) == 0; + startpos = ((const fuzzyItem_T *)s2)->startpos; + const bool exact_match2 = startpos >= 0 + && strncmp(pat, ((fuzzyItem_T *)s2)->itemstr + startpos, patlen) == 0; + + if (exact_match1 == exact_match2) { + const int idx1 = ((const fuzzyItem_T *)s1)->idx; + const int idx2 = ((const fuzzyItem_T *)s2)->idx; + return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; + } else if (exact_match2) { + return 1; + } + return -1; + } else { + return v1 > v2 ? -1 : 1; + } +} + +/// Fuzzy search the string "str" in a list of "items" and return the matching +/// strings in "fmatchlist". +/// If "matchseq" is true, then for multi-word search strings, match all the +/// words in sequence. +/// If "items" is a list of strings, then search for "str" in the list. +/// If "items" is a list of dicts, then either use "key" to lookup the string +/// for each item or use "item_cb" Funcref function to get the string. +/// If "retmatchpos" is true, then return a list of positions where "str" +/// matches for each item. +static void fuzzy_match_in_list(list_T *const l, char *const str, const bool matchseq, + const char *const key, Callback *const item_cb, + const bool retmatchpos, list_T *const fmatchlist, + const int max_matches) + FUNC_ATTR_NONNULL_ARG(2, 5, 7) +{ + int len = tv_list_len(l); + if (len == 0) { + return; + } + if (max_matches > 0 && len > max_matches) { + len = max_matches; + } + + fuzzyItem_T *const items = xcalloc((size_t)len, sizeof(fuzzyItem_T)); + int match_count = 0; + uint32_t matches[FUZZY_MATCH_MAX_LEN]; + + // For all the string items in items, get the fuzzy matching score + TV_LIST_ITER(l, li, { + if (max_matches > 0 && match_count >= max_matches) { + break; + } + + char *itemstr = NULL; + bool itemstr_allocate = false; + typval_T rettv; + rettv.v_type = VAR_UNKNOWN; + const typval_T *const tv = TV_LIST_ITEM_TV(li); + if (tv->v_type == VAR_STRING) { // list of strings + itemstr = tv->vval.v_string; + } else if (tv->v_type == VAR_DICT + && (key != NULL || item_cb->type != kCallbackNone)) { + // For a dict, either use the specified key to lookup the string or + // use the specified callback function to get the string. + if (key != NULL) { + itemstr = tv_dict_get_string(tv->vval.v_dict, key, false); + } else { + typval_T argv[2]; + + // Invoke the supplied callback (if any) to get the dict item + tv->vval.v_dict->dv_refcount++; + argv[0].v_type = VAR_DICT; + argv[0].vval.v_dict = tv->vval.v_dict; + argv[1].v_type = VAR_UNKNOWN; + if (callback_call(item_cb, 1, argv, &rettv)) { + if (rettv.v_type == VAR_STRING) { + itemstr = rettv.vval.v_string; + itemstr_allocate = true; + } + } + tv_dict_unref(tv->vval.v_dict); + } + } + + int score; + if (itemstr != NULL + && fuzzy_match(itemstr, str, matchseq, &score, matches, FUZZY_MATCH_MAX_LEN)) { + items[match_count].idx = (int)match_count; + items[match_count].item = li; + items[match_count].score = score; + items[match_count].pat = str; + items[match_count].startpos = (int)matches[0]; + items[match_count].itemstr = itemstr_allocate ? xstrdup(itemstr) : itemstr; + items[match_count].itemstr_allocated = itemstr_allocate; + + // Copy the list of matching positions in itemstr to a list, if + // "retmatchpos" is set. + if (retmatchpos) { + items[match_count].lmatchpos = tv_list_alloc(kListLenMayKnow); + int j = 0; + const char *p = str; + while (*p != NUL && j < FUZZY_MATCH_MAX_LEN) { + if (!ascii_iswhite(utf_ptr2char(p)) || matchseq) { + tv_list_append_number(items[match_count].lmatchpos, matches[j]); + j++; + } + MB_PTR_ADV(p); + } + } + match_count++; + } + tv_clear(&rettv); + }); + + if (match_count > 0) { + // Sort the list by the descending order of the match score + qsort(items, (size_t)match_count, sizeof(fuzzyItem_T), fuzzy_match_item_compare); + + // For matchfuzzy(), return a list of matched strings. + // ['str1', 'str2', 'str3'] + // For matchfuzzypos(), return a list with three items. + // The first item is a list of matched strings. The second item + // is a list of lists where each list item is a list of matched + // character positions. The third item is a list of matching scores. + // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] + list_T *retlist; + if (retmatchpos) { + const listitem_T *const li = tv_list_find(fmatchlist, 0); + assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); + retlist = TV_LIST_ITEM_TV(li)->vval.v_list; + } else { + retlist = fmatchlist; + } + + // Copy the matching strings to the return list + for (int i = 0; i < match_count; i++) { + tv_list_append_tv(retlist, TV_LIST_ITEM_TV(items[i].item)); + } + + // next copy the list of matching positions + if (retmatchpos) { + const listitem_T *li = tv_list_find(fmatchlist, -2); + assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); + retlist = TV_LIST_ITEM_TV(li)->vval.v_list; + + for (int i = 0; i < match_count; i++) { + assert(items[i].lmatchpos != NULL); + tv_list_append_list(retlist, items[i].lmatchpos); + items[i].lmatchpos = NULL; + } + + // copy the matching scores + li = tv_list_find(fmatchlist, -1); + assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); + retlist = TV_LIST_ITEM_TV(li)->vval.v_list; + for (int i = 0; i < match_count; i++) { + tv_list_append_number(retlist, items[i].score); + } + } + } + + for (int i = 0; i < match_count; i++) { + if (items[i].itemstr_allocated) { + xfree(items[i].itemstr); + } + assert(items[i].lmatchpos == NULL); + } + xfree(items); +} + +/// Do fuzzy matching. Returns the list of matched strings in "rettv". +/// If "retmatchpos" is true, also returns the matching character positions. +static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv, + const bool retmatchpos) + FUNC_ATTR_NONNULL_ALL +{ + // validate and get the arguments + if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) { + semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()"); + return; + } + if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) { + semsg(_(e_invarg2), tv_get_string(&argvars[1])); + return; + } + + Callback cb = CALLBACK_NONE; + const char *key = NULL; + bool matchseq = false; + int max_matches = 0; + if (argvars[2].v_type != VAR_UNKNOWN) { + if (tv_check_for_nonnull_dict_arg(argvars, 2) == FAIL) { + return; + } + + // To search a dict, either a callback function or a key can be + // specified. + dict_T *const d = argvars[2].vval.v_dict; + const dictitem_T *di; + if ((di = tv_dict_find(d, "key", -1)) != NULL) { + if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL + || *di->di_tv.vval.v_string == NUL) { + semsg(_(e_invargNval), "key", tv_get_string(&di->di_tv)); + return; + } + key = tv_get_string(&di->di_tv); + } else if (!tv_dict_get_callback(d, "text_cb", -1, &cb)) { + semsg(_(e_invargval), "text_cb"); + return; + } + + if ((di = tv_dict_find(d, "limit", -1)) != NULL) { + if (di->di_tv.v_type != VAR_NUMBER) { + semsg(_(e_invargval), "limit"); + return; + } + max_matches = (int)tv_get_number_chk(&di->di_tv, NULL); + } + + if (tv_dict_has_key(d, "matchseq")) { + matchseq = true; + } + } + + // get the fuzzy matches + tv_list_alloc_ret(rettv, retmatchpos ? 3 : kListLenUnknown); + if (retmatchpos) { + // For matchfuzzypos(), a list with three items are returned. First + // item is a list of matching strings, the second item is a list of + // lists with matching positions within each string and the third item + // is the list of scores of the matches. + tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); + tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); + tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); + } + + fuzzy_match_in_list(argvars[0].vval.v_list, (char *)tv_get_string(&argvars[1]), + matchseq, key, &cb, retmatchpos, rettv->vval.v_list, max_matches); + + callback_free(&cb); +} + +/// "matchfuzzy()" function +void f_matchfuzzy(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + do_fuzzymatch(argvars, rettv, false); +} + +/// "matchfuzzypos()" function +void f_matchfuzzypos(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) +{ + do_fuzzymatch(argvars, rettv, true); +} + +/// Same as fuzzy_match_item_compare() except for use with a string match +static int fuzzy_match_str_compare(const void *const s1, const void *const s2) + FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE +{ + const int v1 = ((fuzmatch_str_T *)s1)->score; + const int v2 = ((fuzmatch_str_T *)s2)->score; + const int idx1 = ((fuzmatch_str_T *)s1)->idx; + const int idx2 = ((fuzmatch_str_T *)s2)->idx; + + if (v1 == v2) { + return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; + } else { + return v1 > v2 ? -1 : 1; + } +} + +/// Sort fuzzy matches by score +static void fuzzy_match_str_sort(fuzmatch_str_T *const fm, const int sz) + FUNC_ATTR_NONNULL_ALL +{ + // Sort the list by the descending order of the match score + qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_str_compare); +} + +/// Same as fuzzy_match_item_compare() except for use with a function name +/// string match. <SNR> functions should be sorted to the end. +static int fuzzy_match_func_compare(const void *const s1, const void *const s2) + FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE +{ + const int v1 = ((fuzmatch_str_T *)s1)->score; + const int v2 = ((fuzmatch_str_T *)s2)->score; + const int idx1 = ((fuzmatch_str_T *)s1)->idx; + const int idx2 = ((fuzmatch_str_T *)s2)->idx; + const char *const str1 = ((fuzmatch_str_T *)s1)->str; + const char *const str2 = ((fuzmatch_str_T *)s2)->str; + + if (*str1 != '<' && *str2 == '<') { + return -1; + } + if (*str1 == '<' && *str2 != '<') { + return 1; + } + if (v1 == v2) { + return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; + } + return v1 > v2 ? -1 : 1; +} + +/// Sort fuzzy matches of function names by score. +/// <SNR> functions should be sorted to the end. +static void fuzzy_match_func_sort(fuzmatch_str_T *const fm, const int sz) + FUNC_ATTR_NONNULL_ALL +{ + // Sort the list by the descending order of the match score + qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_func_compare); +} + +/// Fuzzy match "pat" in "str". +/// @returns 0 if there is no match. Otherwise, returns the match score. +int fuzzy_match_str(char *const str, const char *const pat) + FUNC_ATTR_WARN_UNUSED_RESULT +{ + if (str == NULL || pat == NULL) { + return 0; + } + + int score = FUZZY_SCORE_NONE; + uint32_t matchpos[FUZZY_MATCH_MAX_LEN]; + fuzzy_match(str, pat, true, &score, matchpos, ARRAY_SIZE(matchpos)); + + return score; +} + +/// Fuzzy match the position of string "pat" in string "str". +/// @returns a dynamic array of matching positions. If there is no match, returns NULL. +garray_T *fuzzy_match_str_with_pos(char *const str, const char *const pat) +{ + if (str == NULL || pat == NULL) { + return NULL; + } + + garray_T *match_positions = xmalloc(sizeof(garray_T)); + ga_init(match_positions, sizeof(uint32_t), 10); + + int score = FUZZY_SCORE_NONE; + uint32_t matches[FUZZY_MATCH_MAX_LEN]; + if (!fuzzy_match(str, pat, false, &score, matches, FUZZY_MATCH_MAX_LEN) + || score == FUZZY_SCORE_NONE) { + ga_clear(match_positions); + xfree(match_positions); + return NULL; + } + + int j = 0; + for (const char *p = pat; *p != NUL; MB_PTR_ADV(p)) { + if (!ascii_iswhite(utf_ptr2char(p))) { + GA_APPEND(uint32_t, match_positions, matches[j]); + j++; + } + } + + return match_positions; +} + +/// This function splits the line pointed to by `*ptr` into words and performs +/// a fuzzy match for the pattern `pat` on each word. It iterates through the +/// line, moving `*ptr` to the start of each word during the process. +/// +/// If a match is found: +/// - `*ptr` points to the start of the matched word. +/// - `*len` is set to the length of the matched word. +/// - `*score` contains the match score. +/// +/// If no match is found, `*ptr` is updated to the end of the line. +bool fuzzy_match_str_in_line(char **ptr, char *pat, int *len, pos_T *current_pos, int *score) +{ + char *str = *ptr; + char *strBegin = str; + char *end = NULL; + char *start = NULL; + bool found = false; + + if (str == NULL || pat == NULL) { + return found; + } + char *line_end = find_line_end(str); + + while (str < line_end) { + // Skip non-word characters + start = find_word_start(str); + if (*start == NUL) { + break; + } + end = find_word_end(start); + + // Extract the word from start to end + char save_end = *end; + *end = NUL; + + // Perform fuzzy match + *score = fuzzy_match_str(start, pat); + *end = save_end; + + if (*score != FUZZY_SCORE_NONE) { + *len = (int)(end - start); + found = true; + *ptr = start; + if (current_pos) { + current_pos->col += (int)(end - strBegin); + } + break; + } + + // Move to the end of the current word for the next iteration + str = end; + // Ensure we continue searching after the current word + while (*str != NUL && !vim_iswordp(str)) { + MB_PTR_ADV(str); + } + } + + if (!found) { + *ptr = line_end; + } + + return found; +} + +/// Search for the next fuzzy match in the specified buffer. +/// This function attempts to find the next occurrence of the given pattern +/// in the buffer, starting from the current position. It handles line wrapping +/// and direction of search. +/// +/// Return true if a match is found, otherwise false. +bool search_for_fuzzy_match(buf_T *buf, pos_T *pos, char *pattern, int dir, pos_T *start_pos, + int *len, char **ptr, int *score) +{ + pos_T current_pos = *pos; + pos_T circly_end; + bool found_new_match = false; + bool looped_around = false; + + bool whole_line = ctrl_x_mode_whole_line(); + + if (buf == curbuf) { + circly_end = *start_pos; + } else { + circly_end.lnum = buf->b_ml.ml_line_count; + circly_end.col = 0; + circly_end.coladd = 0; + } + + if (whole_line && start_pos->lnum != pos->lnum) { + current_pos.lnum += dir; + } + + while (true) { + // Check if looped around and back to start position + if (looped_around && equalpos(current_pos, circly_end)) { + break; + } + + // Ensure current_pos is valid + if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) { + // Get the current line buffer + *ptr = ml_get_buf(buf, current_pos.lnum); + if (!whole_line) { + *ptr += current_pos.col; + } + + // If ptr is end of line is reached, move to next line + // or previous line based on direction + if (*ptr != NULL && **ptr != NUL) { + if (!whole_line) { + // Try to find a fuzzy match in the current line starting + // from current position + found_new_match = fuzzy_match_str_in_line(ptr, pattern, + len, &current_pos, score); + if (found_new_match) { + *pos = current_pos; + break; + } else if (looped_around && current_pos.lnum == circly_end.lnum) { + break; + } + } else { + if (fuzzy_match_str(*ptr, pattern) != FUZZY_SCORE_NONE) { + found_new_match = true; + *pos = current_pos; + *len = ml_get_buf_len(buf, current_pos.lnum); + break; + } + } + } + } + + // Move to the next line or previous line based on direction + if (dir == FORWARD) { + if (++current_pos.lnum > buf->b_ml.ml_line_count) { + if (p_ws) { + current_pos.lnum = 1; + looped_around = true; + } else { + break; + } + } + } else { + if (--current_pos.lnum < 1) { + if (p_ws) { + current_pos.lnum = buf->b_ml.ml_line_count; + looped_around = true; + } else { + break; + } + } + } + current_pos.col = 0; + } + + return found_new_match; +} + +/// Free an array of fuzzy string matches "fuzmatch[count]". +void fuzmatch_str_free(fuzmatch_str_T *const fuzmatch, int count) +{ + if (fuzmatch == NULL) { + return; + } + for (int i = 0; i < count; i++) { + xfree(fuzmatch[count].str); + } + xfree(fuzmatch); +} + +/// Copy a list of fuzzy matches into a string list after sorting the matches by +/// the fuzzy score. Frees the memory allocated for "fuzmatch". +void fuzzymatches_to_strmatches(fuzmatch_str_T *const fuzmatch, char ***const matches, + const int count, const bool funcsort) + FUNC_ATTR_NONNULL_ARG(2) +{ + if (count <= 0) { + return; + } + + *matches = xmalloc((size_t)count * sizeof(char *)); + + // Sort the list by the descending order of the match score + if (funcsort) { + fuzzy_match_func_sort(fuzmatch, count); + } else { + fuzzy_match_str_sort(fuzmatch, count); + } + + for (int i = 0; i < count; i++) { + (*matches)[i] = fuzmatch[i].str; + } + xfree(fuzmatch); +} + +/// Fuzzy match algorithm ported from https://github.com/jhawthorn/fzy. +/// This implementation extends the original by supporting multibyte characters. + +#define MATCH_MAX_LEN FUZZY_MATCH_MAX_LEN + +#define SCORE_GAP_LEADING -0.005 +#define SCORE_GAP_TRAILING -0.005 +#define SCORE_GAP_INNER -0.01 +#define SCORE_MATCH_CONSECUTIVE 1.0 +#define SCORE_MATCH_SLASH 0.9 +#define SCORE_MATCH_WORD 0.8 +#define SCORE_MATCH_CAPITAL 0.7 +#define SCORE_MATCH_DOT 0.6 + +static int has_match(const char *needle, const char *haystack) +{ + while (*needle != NUL) { + const int n_char = utf_ptr2char(needle); + const char *p = haystack; + bool matched = false; + + while (*p != NUL) { + const int h_char = utf_ptr2char(p); + + if (n_char == h_char || mb_toupper(n_char) == h_char) { + matched = true; + break; + } + p += utfc_ptr2len(p); + } + + if (!matched) { + return 0; + } + + needle += utfc_ptr2len(needle); + haystack = p + utfc_ptr2len(p); + } + return 1; +} + +struct match_struct { + int needle_len; + int haystack_len; + int lower_needle[MATCH_MAX_LEN]; ///< stores codepoints + int lower_haystack[MATCH_MAX_LEN]; ///< stores codepoints + score_t match_bonus[MATCH_MAX_LEN]; +}; + +#define IS_WORD_SEP(c) ((c) == '-' || (c) == '_' || (c) == ' ') +#define IS_PATH_SEP(c) ((c) == '/') +#define IS_DOT(c) ((c) == '.') + +static score_t compute_bonus_codepoint(int last_c, int c) +{ + if (ASCII_ISALNUM(c) || vim_iswordc(c)) { + if (IS_PATH_SEP(last_c)) { + return SCORE_MATCH_SLASH; + } + if (IS_WORD_SEP(last_c)) { + return SCORE_MATCH_WORD; + } + if (IS_DOT(last_c)) { + return SCORE_MATCH_DOT; + } + if (mb_isupper(c) && mb_islower(last_c)) { + return SCORE_MATCH_CAPITAL; + } + } + return 0; +} + +static void setup_match_struct(match_struct *const match, const char *const needle, + const char *const haystack) +{ + int i = 0; + const char *p = needle; + while (*p != NUL && i < MATCH_MAX_LEN) { + const int c = utf_ptr2char(p); + match->lower_needle[i++] = mb_tolower(c); + MB_PTR_ADV(p); + } + match->needle_len = i; + + i = 0; + p = haystack; + int prev_c = '/'; + while (*p != NUL && i < MATCH_MAX_LEN) { + const int c = utf_ptr2char(p); + match->lower_haystack[i] = mb_tolower(c); + match->match_bonus[i] = compute_bonus_codepoint(prev_c, c); + prev_c = c; + MB_PTR_ADV(p); + i++; + } + match->haystack_len = i; +} + +static inline void match_row(const match_struct *match, int row, score_t *curr_D, score_t *curr_M, + const score_t *last_D, const score_t *last_M) +{ + int n = match->needle_len; + int m = match->haystack_len; + int i = row; + + const int *lower_needle = match->lower_needle; + const int *lower_haystack = match->lower_haystack; + const score_t *match_bonus = match->match_bonus; + + score_t prev_score = (score_t)SCORE_MIN; + score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; + + // These will not be used with this value, but not all compilers see it + score_t prev_M = (score_t)SCORE_MIN, prev_D = (score_t)SCORE_MIN; + + for (int j = 0; j < m; j++) { + if (lower_needle[i] == lower_haystack[j]) { + score_t score = (score_t)SCORE_MIN; + if (!i) { + score = (j * SCORE_GAP_LEADING) + match_bonus[j]; + } else if (j) { // i > 0 && j > 0 + score = MAX(prev_M + match_bonus[j], + // consecutive match, doesn't stack with match_bonus + prev_D + SCORE_MATCH_CONSECUTIVE); + } + prev_D = last_D[j]; + prev_M = last_M[j]; + curr_D[j] = score; + curr_M[j] = prev_score = MAX(score, prev_score + gap_score); + } else { + prev_D = last_D[j]; + prev_M = last_M[j]; + curr_D[j] = (score_t)SCORE_MIN; + curr_M[j] = prev_score = prev_score + gap_score; + } + } +} + +static score_t match_positions(const char *const needle, const char *const haystack, + uint32_t *const positions) +{ + if (!*needle) { + return (score_t)SCORE_MIN; + } + + match_struct match; + setup_match_struct(&match, needle, haystack); + + int n = match.needle_len; + int m = match.haystack_len; + + if (m > MATCH_MAX_LEN || n > m) { + // Unreasonably large candidate: return no score + // If it is a valid match it will still be returned, it will + // just be ranked below any reasonably sized candidates + return (score_t)SCORE_MIN; + } else if (n == m) { + // Since this method can only be called with a haystack which + // matches needle. If the lengths of the strings are equal the + // strings themselves must also be equal (ignoring case). + if (positions) { + for (int i = 0; i < n; i++) { + positions[i] = (uint32_t)i; + } + } + return (score_t)SCORE_MAX; + } + + // D[][] Stores the best score for this position ending with a match. + // M[][] Stores the best possible score at this position. + score_t(*D)[MATCH_MAX_LEN] = xmalloc(sizeof(score_t) * MATCH_MAX_LEN * (size_t)n); + score_t(*M)[MATCH_MAX_LEN] = xmalloc(sizeof(score_t) * MATCH_MAX_LEN * (size_t)n); + + match_row(&match, 0, D[0], M[0], D[0], M[0]); + for (int i = 1; i < n; i++) { + match_row(&match, i, D[i], M[i], D[i - 1], M[i - 1]); + } + + // backtrace to find the positions of optimal matching + if (positions) { + int match_required = 0; + for (int i = n - 1, j = m - 1; i >= 0; i--) { + for (; j >= 0; j--) { + // There may be multiple paths which result in + // the optimal weight. + // + // For simplicity, we will pick the first one + // we encounter, the latest in the candidate + // string. + if (D[i][j] != (score_t)SCORE_MIN + && (match_required || D[i][j] == M[i][j])) { + // If this score was determined using + // SCORE_MATCH_CONSECUTIVE, the + // previous character MUST be a match + match_required = i && j + && M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE; + positions[i] = (uint32_t)(j--); + break; + } + } + } + } + + score_t result = M[n - 1][m - 1]; + + xfree(M); + xfree(D); + + return result; +} diff --git a/src/nvim/fuzzy.h b/src/nvim/fuzzy.h @@ -0,0 +1,25 @@ +#pragma once + +#include <limits.h> +#include <stdint.h> // IWYU pragma: keep + +#include "nvim/buffer_defs.h" // IWYU pragma: keep +#include "nvim/eval/typval_defs.h" // IWYU pragma: keep +#include "nvim/garray_defs.h" // IWYU pragma: keep +#include "nvim/pos_defs.h" // IWYU pragma: keep +#include "nvim/types_defs.h" // IWYU pragma: keep + +enum { FUZZY_MATCH_MAX_LEN = 1024, }; ///< max characters that can be matched +enum { FUZZY_SCORE_NONE = INT_MIN, }; ///< invalid fuzzy score + +/// Fuzzy matched string list item. Used for fuzzy match completion. Items are +/// usually sorted by "score". The "idx" member is used for stable-sort. +typedef struct { + int idx; + char *str; + int score; +} fuzmatch_str_T; + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "fuzzy.h.generated.h" +#endif diff --git a/src/nvim/insexpand.c b/src/nvim/insexpand.c @@ -34,6 +34,7 @@ #include "nvim/extmark.h" #include "nvim/extmark_defs.h" #include "nvim/fileio.h" +#include "nvim/fuzzy.h" #include "nvim/garray.h" #include "nvim/garray_defs.h" #include "nvim/getchar.h" @@ -1000,7 +1001,7 @@ static int ins_compl_add(char *const str, int len, char *const fname, char *cons // current match in the list of matches . if (compl_first_match == NULL) { match->cp_next = match->cp_prev = NULL; - } else if (cfc_has_mode() && score > 0 && compl_get_longest) { + } else if (cfc_has_mode() && score != FUZZY_SCORE_NONE && compl_get_longest) { compl_T *current = compl_first_match->cp_next; compl_T *prev = compl_first_match; inserted = false; @@ -1188,7 +1189,8 @@ static void ins_compl_add_matches(int num_matches, char **matches, int icase) for (int i = 0; i < num_matches && add_r != FAIL; i++) { add_r = ins_compl_add(matches[i], -1, NULL, NULL, false, NULL, dir, - CP_FAST | (icase ? CP_ICASE : 0), false, NULL, 0); + CP_FAST | (icase ? CP_ICASE : 0), false, NULL, + FUZZY_SCORE_NONE); if (add_r == OK) { // If dir was BACKWARD then honor it just once. dir = FORWARD; @@ -1356,7 +1358,7 @@ static int cp_compare_nearest(const void *a, const void *b) { int score_a = ((compl_T *)a)->cp_score; int score_b = ((compl_T *)b)->cp_score; - if (score_a == 0 || score_b == 0) { + if (score_a == FUZZY_SCORE_NONE || score_b == FUZZY_SCORE_NONE) { return 0; } return (score_a > score_b) ? 1 : (score_a < score_b) ? -1 : 0; @@ -1524,7 +1526,7 @@ static int ins_compl_build_pum(void) if (!match_at_original_text(comp) && (leader->data == NULL || ins_compl_equal(comp, leader->data, leader->size) - || (fuzzy_filter && comp->cp_score > 0))) { + || (fuzzy_filter && comp->cp_score != FUZZY_SCORE_NONE))) { // Limit number of items from each source if max_items is set. bool match_limit_exceeded = false; int cur_source = comp->cp_cpt_source_idx; @@ -1863,7 +1865,7 @@ static int thesaurus_add_words_in_line(char *fname, char **buf_arg, int dir, con // Add the word. Skip the regexp match. if (wstart != skip_word) { status = ins_compl_add_infercase(wstart, (int)(ptr - wstart), p_ic, - fname, dir, false, 0); + fname, dir, false, FUZZY_SCORE_NONE); if (status == FAIL) { break; } @@ -1909,7 +1911,8 @@ static void ins_compl_files(int count, char **files, bool thesaurus, int flags, ptr = ctrl_x_mode_line_or_eval() ? find_line_end(ptr) : find_word_end(ptr); int add_r = ins_compl_add_infercase(regmatch->startp[0], (int)(ptr - regmatch->startp[0]), - p_ic, files[i], *dir, false, 0); + p_ic, files[i], *dir, false, + FUZZY_SCORE_NONE); if (thesaurus) { // For a thesaurus, add all the words in the line ptr = buf; @@ -3218,7 +3221,7 @@ static int ins_compl_add_tv(typval_T *const tv, const Direction dir, bool fast) return FAIL; } int status = ins_compl_add((char *)word, -1, NULL, cptext, true, - &user_data, dir, flags, dup, user_hl, 0); + &user_data, dir, flags, dup, user_hl, FUZZY_SCORE_NONE); if (status != OK) { tv_clear(&user_data); } @@ -3314,7 +3317,7 @@ static void set_completion(colnr_T startcol, list_T *list) } if (ins_compl_add(compl_orig_text.data, (int)compl_orig_text.size, NULL, NULL, false, NULL, 0, - flags | CP_FAST, false, NULL, 0) != OK) { + flags | CP_FAST, false, NULL, FUZZY_SCORE_NONE) != OK) { return; } @@ -4072,7 +4075,7 @@ static void get_next_filename_completion(void) for (int i = 0; i < num_matches; i++) { char *ptr = matches[i]; int score = fuzzy_match_str(ptr, leader); - if (score > 0) { + if (score != FUZZY_SCORE_NONE) { GA_APPEND(int, &fuzzy_indices, i); compl_fuzzy_scores[i] = score; } @@ -4245,7 +4248,7 @@ static int get_next_default_completion(ins_compl_next_state_T *st, pos_T *start_ bool in_fuzzy_collect = (cfc_has_mode() && compl_length > 0) || ((get_cot_flags() & kOptCotFlagFuzzy) && compl_autocomplete); char *leader = ins_compl_leader(); - int score = 0; + int score = FUZZY_SCORE_NONE; const bool in_curbuf = st->ins_buf == curbuf; // If 'infercase' is set, don't use 'smartcase' here @@ -4343,7 +4346,6 @@ static int get_next_default_completion(ins_compl_next_state_T *st, pos_T *start_ if (score < 0) { score = -score; } - score++; } if (ins_compl_add_infercase(ptr, len, p_ic, @@ -4401,7 +4403,8 @@ static void get_register_completion(void) compl_orig_text.size) == 0 : strncmp(str, compl_orig_text.data, compl_orig_text.size) == 0)) { - if (ins_compl_add_infercase(str, str_len, p_ic, NULL, dir, false, 0) == OK) { + if (ins_compl_add_infercase(str, str_len, p_ic, NULL, + dir, false, FUZZY_SCORE_NONE) == OK) { dir = FORWARD; } } @@ -4435,7 +4438,7 @@ static void get_register_completion(void) : strncmp(p, compl_orig_text.data, compl_orig_text.size) == 0))) { if (ins_compl_add_infercase(p, len, p_ic, NULL, - dir, false, 0) == OK) { + dir, false, FUZZY_SCORE_NONE) == OK) { dir = FORWARD; } } @@ -4553,7 +4556,7 @@ static void get_next_bufname_token(void) char *tail = path_tail(b->b_sfname); if (strncmp(tail, compl_orig_text.data, compl_orig_text.size) == 0) { ins_compl_add(tail, (int)strlen(tail), NULL, NULL, false, NULL, 0, - p_ic ? CP_ICASE : 0, false, NULL, 0); + p_ic ? CP_ICASE : 0, false, NULL, FUZZY_SCORE_NONE); } } } @@ -4796,7 +4799,8 @@ static int ins_compl_get_exp(pos_T *ini) // For `^P` completion, reset `compl_curr_match` to the head to avoid // mixing matches from different sources. if (!compl_dir_forward()) { - while (compl_curr_match->cp_prev) { + while (compl_curr_match->cp_prev + && !match_at_original_text(compl_curr_match->cp_prev)) { compl_curr_match = compl_curr_match->cp_prev; } } @@ -5145,7 +5149,7 @@ static int find_next_completion_match(bool allow_get_expansion, int todo, bool a if (!match_at_original_text(compl_shown_match) && leader->data != NULL && !ins_compl_equal(compl_shown_match, leader->data, leader->size) - && !(compl_fuzzy_match && compl_shown_match->cp_score > 0)) { + && !(compl_fuzzy_match && compl_shown_match->cp_score != FUZZY_SCORE_NONE)) { todo++; } else { // Remember a matching item. @@ -5919,7 +5923,7 @@ static int ins_compl_start(void) } if (ins_compl_add(compl_orig_text.data, (int)compl_orig_text.size, NULL, NULL, false, NULL, 0, - flags, false, NULL, 0) != OK) { + flags, false, NULL, FUZZY_SCORE_NONE) != OK) { API_CLEAR_STRING(compl_pattern); API_CLEAR_STRING(compl_orig_text); kv_destroy(compl_orig_extmarks); diff --git a/src/nvim/mapping.c b/src/nvim/mapping.c @@ -27,6 +27,7 @@ #include "nvim/eval/userfunc.h" #include "nvim/ex_cmds_defs.h" #include "nvim/ex_session.h" +#include "nvim/fuzzy.h" #include "nvim/garray.h" #include "nvim/garray_defs.h" #include "nvim/getchar.h" @@ -50,7 +51,6 @@ #include "nvim/regexp.h" #include "nvim/regexp_defs.h" #include "nvim/runtime.h" -#include "nvim/search.h" #include "nvim/state_defs.h" #include "nvim/strings.h" #include "nvim/types_defs.h" @@ -1340,7 +1340,7 @@ int ExpandMappings(char *pat, regmatch_T *regmatch, int *numMatches, char ***mat match = vim_regexec(regmatch, p, 0); } else { score = fuzzy_match_str(p, pat); - match = (score != 0); + match = (score != FUZZY_SCORE_NONE); } if (!match) { @@ -1386,7 +1386,7 @@ int ExpandMappings(char *pat, regmatch_T *regmatch, int *numMatches, char ***mat match = vim_regexec(regmatch, p, 0); } else { score = fuzzy_match_str(p, pat); - match = (score != 0); + match = (score != FUZZY_SCORE_NONE); } if (!match) { diff --git a/src/nvim/option.c b/src/nvim/option.c @@ -56,6 +56,7 @@ #include "nvim/ex_getln.h" #include "nvim/ex_session.h" #include "nvim/fold.h" +#include "nvim/fuzzy.h" #include "nvim/garray.h" #include "nvim/garray_defs.h" #include "nvim/gettext_defs.h" @@ -97,7 +98,6 @@ #include "nvim/regexp.h" #include "nvim/regexp_defs.h" #include "nvim/runtime.h" -#include "nvim/search.h" #include "nvim/spell.h" #include "nvim/spellfile.h" #include "nvim/spellsuggest.h" @@ -5578,7 +5578,7 @@ static bool match_str(char *const str, regmatch_T *const regmatch, char **const } } else { const int score = fuzzy_match_str(str, fuzzystr); - if (score != 0) { + if (score != FUZZY_SCORE_NONE) { if (!test_only) { fuzmatch[idx].idx = idx; fuzmatch[idx].str = xstrdup(str); diff --git a/src/nvim/popupmenu.c b/src/nvim/popupmenu.c @@ -21,8 +21,7 @@ #include "nvim/eval/typval.h" #include "nvim/ex_cmds.h" #include "nvim/ex_cmds_defs.h" -#include "nvim/extmark.h" -#include "nvim/extmark_defs.h" +#include "nvim/fuzzy.h" #include "nvim/garray.h" #include "nvim/garray_defs.h" #include "nvim/getchar.h" @@ -33,22 +32,18 @@ #include "nvim/highlight_defs.h" #include "nvim/insexpand.h" #include "nvim/keycodes.h" -#include "nvim/mark.h" #include "nvim/mbyte.h" -#include "nvim/memline.h" #include "nvim/memory.h" #include "nvim/memory_defs.h" #include "nvim/menu.h" #include "nvim/message.h" #include "nvim/move.h" -#include "nvim/ops.h" #include "nvim/option.h" #include "nvim/option_defs.h" #include "nvim/option_vars.h" #include "nvim/plines.h" #include "nvim/popupmenu.h" #include "nvim/pos_defs.h" -#include "nvim/search.h" #include "nvim/state_defs.h" #include "nvim/strings.h" #include "nvim/types_defs.h" diff --git a/src/nvim/quickfix.c b/src/nvim/quickfix.c @@ -35,6 +35,7 @@ #include "nvim/extmark.h" #include "nvim/fileio.h" #include "nvim/fold.h" +#include "nvim/fuzzy.h" #include "nvim/garray.h" #include "nvim/garray_defs.h" #include "nvim/gettext_defs.h" @@ -5487,7 +5488,7 @@ static bool vgr_match_buflines(qf_list_T *qfl, char *fname, buf_T *buf, char *sp { bool found_match = false; size_t pat_len = strlen(spat); - pat_len = MIN(pat_len, MAX_FUZZY_MATCHES); + pat_len = MIN(pat_len, FUZZY_MATCH_MAX_LEN); for (linenr_T lnum = 1; lnum <= buf->b_ml.ml_line_count && *tomatch > 0; lnum++) { colnr_T col = 0; @@ -5533,12 +5534,12 @@ static bool vgr_match_buflines(qf_list_T *qfl, char *fname, buf_T *buf, char *sp char *const str = ml_get_buf(buf, lnum); const colnr_T linelen = ml_get_buf_len(buf, lnum); int score; - uint32_t matches[MAX_FUZZY_MATCHES]; + uint32_t matches[FUZZY_MATCH_MAX_LEN]; const size_t sz = sizeof(matches) / sizeof(matches[0]); // Fuzzy string match CLEAR_FIELD(matches); - while (fuzzy_match(str + col, spat, false, &score, matches, (int)sz, true) > 0) { + while (fuzzy_match(str + col, spat, false, &score, matches, (int)sz) > 0) { // Pass the buffer number so that it gets used even for a // dummy buffer, unless duplicate_name is set, then the // buffer will be wiped out below. diff --git a/src/nvim/search.c b/src/nvim/search.c @@ -29,8 +29,6 @@ #include "nvim/file_search.h" #include "nvim/fileio.h" #include "nvim/fold.h" -#include "nvim/garray.h" -#include "nvim/garray_defs.h" #include "nvim/getchar.h" #include "nvim/gettext_defs.h" #include "nvim/globals.h" @@ -2910,925 +2908,6 @@ the_end: restore_incsearch_state(); } -/// Fuzzy string matching -/// -/// Ported from the lib_fts library authored by Forrest Smith. -/// https://github.com/forrestthewoods/lib_fts/tree/master/code -/// -/// The following blog describes the fuzzy matching algorithm: -/// https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/ -/// -/// Each matching string is assigned a score. The following factors are checked: -/// - Matched letter -/// - Unmatched letter -/// - Consecutively matched letters -/// - Proximity to start -/// - Letter following a separator (space, underscore) -/// - Uppercase letter following lowercase (aka CamelCase) -/// -/// Matched letters are good. Unmatched letters are bad. Matching near the start -/// is good. Matching the first letter in the middle of a phrase is good. -/// Matching the uppercase letters in camel case entries is good. -/// -/// The score assigned for each factor is explained below. -/// File paths are different from file names. File extensions may be ignorable. -/// Single words care about consecutive matches but not separators or camel -/// case. -/// Score starts at 100 -/// Matched letter: +0 points -/// Unmatched letter: -1 point -/// Consecutive match bonus: +15 points -/// First letter bonus: +15 points -/// Separator bonus: +30 points -/// Camel case bonus: +30 points -/// Unmatched leading letter: -5 points (max: -15) -/// -/// There is some nuance to this. Scores don’t have an intrinsic meaning. The -/// score range isn’t 0 to 100. It’s roughly [50, 150]. Longer words have a -/// lower minimum score due to unmatched letter penalty. Longer search patterns -/// have a higher maximum score due to match bonuses. -/// -/// Separator and camel case bonus is worth a LOT. Consecutive matches are worth -/// quite a bit. -/// -/// There is a penalty if you DON’T match the first three letters. Which -/// effectively rewards matching near the start. However there’s no difference -/// in matching between the middle and end. -/// -/// There is not an explicit bonus for an exact match. Unmatched letters receive -/// a penalty. So shorter strings and closer matches are worth more. -typedef struct { - int idx; ///< used for stable sort - listitem_T *item; - int score; - list_T *lmatchpos; -} fuzzyItem_T; - -/// bonus for adjacent matches; this is higher than SEPARATOR_BONUS so that -/// matching a whole word is preferred. -#define SEQUENTIAL_BONUS 40 -/// bonus if match occurs after a path separator -#define PATH_SEPARATOR_BONUS 30 -/// bonus if match occurs after a word separator -#define WORD_SEPARATOR_BONUS 25 -/// bonus if match is uppercase and prev is lower -#define CAMEL_BONUS 30 -/// bonus if the first letter is matched -#define FIRST_LETTER_BONUS 15 -/// bonus if exact match -#define EXACT_MATCH_BONUS 100 -/// bonus if case match when no ignorecase -#define CASE_MATCH_BONUS 25 -/// penalty applied for every letter in str before the first match -#define LEADING_LETTER_PENALTY (-5) -/// maximum penalty for leading letters -#define MAX_LEADING_LETTER_PENALTY (-15) -/// penalty for every letter that doesn't match -#define UNMATCHED_LETTER_PENALTY (-1) -/// penalty for gap in matching positions (-2 * k) -#define GAP_PENALTY (-2) -/// Score for a string that doesn't fuzzy match the pattern -#define SCORE_NONE (-9999) - -#define FUZZY_MATCH_RECURSION_LIMIT 10 - -/// Compute a score for a fuzzy matched string. The matching character locations -/// are in "matches". -static int fuzzy_match_compute_score(const char *const fuzpat, const char *const str, - const int strSz, const uint32_t *const matches, - const int numMatches, bool camelcase) - FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE -{ - assert(numMatches > 0); // suppress clang "result of operation is garbage" - const char *p = str; - uint32_t sidx = 0; - bool is_exact_match = true; - const char *const orig_fuzpat = fuzpat - numMatches; - const char *curpat = orig_fuzpat; - int pat_idx = 0; - // Track consecutive camel case matches - int consecutive_camel = 0; - - // Initialize score - int score = 100; - - // Apply leading letter penalty - int penalty = LEADING_LETTER_PENALTY * (int)matches[0]; - if (penalty < MAX_LEADING_LETTER_PENALTY) { - penalty = MAX_LEADING_LETTER_PENALTY; - } - score += penalty; - - // Apply unmatched penalty - const int unmatched = strSz - numMatches; - score += UNMATCHED_LETTER_PENALTY * unmatched; - // In a long string, not all matches may be found due to the recursion limit. - // If at least one match is found, reset the score to a non-negative value. - if (score < 0 && numMatches > 0) { - score = 0; - } - - // Apply ordering bonuses - for (int i = 0; i < numMatches; i++) { - const uint32_t currIdx = matches[i]; - bool is_camel = false; - - if (i > 0) { - const uint32_t prevIdx = matches[i - 1]; - - // Sequential - if (currIdx == prevIdx + 1) { - score += SEQUENTIAL_BONUS; - } else { - score += GAP_PENALTY * (int)(currIdx - prevIdx); - // Reset consecutive camel count on gap - consecutive_camel = 0; - } - } - - int curr; - // Check for bonuses based on neighbor character value - if (currIdx > 0) { - // Camel case - int neighbor = ' '; - - while (sidx < currIdx) { - neighbor = utf_ptr2char(p); - MB_PTR_ADV(p); - sidx++; - } - curr = utf_ptr2char(p); - - // Enhanced camel case scoring - if (camelcase && mb_islower(neighbor) && mb_isupper(curr)) { - score += CAMEL_BONUS * 2; // Double the camel case bonus - is_camel = true; - consecutive_camel++; - // Additional bonus for consecutive camel - if (consecutive_camel > 1) { - score += CAMEL_BONUS; - } - } else { - consecutive_camel = 0; - } - - // Bonus if the match follows a separator character - if (neighbor == '/' || neighbor == '\\') { - score += PATH_SEPARATOR_BONUS; - } else if (neighbor == ' ' || neighbor == '_') { - score += WORD_SEPARATOR_BONUS; - } - } else { - // First letter - score += FIRST_LETTER_BONUS; - curr = utf_ptr2char(p); - } - - // Case matching bonus - if (mb_isalpha(curr)) { - while (pat_idx < i && *curpat) { - MB_PTR_ADV(curpat); - pat_idx++; - } - - if (curr == utf_ptr2char(curpat)) { - score += CASE_MATCH_BONUS; - // Extra bonus for exact case match in camel - if (is_camel) { - score += CASE_MATCH_BONUS / 2; - } - } - } - - // Check exact match condition - if (currIdx != (uint32_t)i) { - is_exact_match = false; - } - } - - // Boost score for exact matches - if (is_exact_match && numMatches == strSz) { - score += EXACT_MATCH_BONUS; - } - - return score; -} - -/// Perform a recursive search for fuzzy matching "fuzpat" in "str". -/// @return the number of matching characters. -static int fuzzy_match_recursive(const char *fuzpat, const char *str, uint32_t strIdx, - int *const outScore, const char *const strBegin, const int strLen, - const uint32_t *const srcMatches, uint32_t *const matches, - const int maxMatches, int nextMatch, int *const recursionCount, - bool camelcase) - FUNC_ATTR_NONNULL_ARG(1, 2, 4, 5, 8, 11) FUNC_ATTR_WARN_UNUSED_RESULT -{ - // Recursion params - bool recursiveMatch = false; - uint32_t bestRecursiveMatches[MAX_FUZZY_MATCHES]; - int bestRecursiveScore = 0; - - // Count recursions - (*recursionCount)++; - if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) { - return 0; - } - - // Detect end of strings - if (*fuzpat == NUL || *str == NUL) { - return 0; - } - - // Loop through fuzpat and str looking for a match - bool first_match = true; - while (*fuzpat != NUL && *str != NUL) { - const int c1 = utf_ptr2char(fuzpat); - const int c2 = utf_ptr2char(str); - - // Found match - if (mb_tolower(c1) == mb_tolower(c2)) { - // Supplied matches buffer was too short - if (nextMatch >= maxMatches) { - return 0; - } - - int recursiveScore = 0; - uint32_t recursiveMatches[MAX_FUZZY_MATCHES]; - CLEAR_FIELD(recursiveMatches); - - // "Copy-on-Write" srcMatches into matches - if (first_match && srcMatches != NULL) { - memcpy(matches, srcMatches, (size_t)nextMatch * sizeof(srcMatches[0])); - first_match = false; - } - - // Recursive call that "skips" this match - const char *const next_char = str + utfc_ptr2len(str); - if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, &recursiveScore, strBegin, strLen, - matches, recursiveMatches, - sizeof(recursiveMatches) / sizeof(recursiveMatches[0]), nextMatch, - recursionCount, camelcase)) { - // Pick best recursive score - if (!recursiveMatch || recursiveScore > bestRecursiveScore) { - memcpy(bestRecursiveMatches, recursiveMatches, - MAX_FUZZY_MATCHES * sizeof(recursiveMatches[0])); - bestRecursiveScore = recursiveScore; - } - recursiveMatch = true; - } - - // Advance - matches[nextMatch++] = strIdx; - MB_PTR_ADV(fuzpat); - } - MB_PTR_ADV(str); - strIdx++; - } - - // Determine if full fuzpat was matched - const bool matched = *fuzpat == NUL; - - // Calculate score - if (matched) { - *outScore = fuzzy_match_compute_score(fuzpat, strBegin, strLen, matches, nextMatch, camelcase); - } - - // Return best result - if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) { - // Recursive score is better than "this" - memcpy(matches, bestRecursiveMatches, (size_t)maxMatches * sizeof(matches[0])); - *outScore = bestRecursiveScore; - return nextMatch; - } else if (matched) { - return nextMatch; // "this" score is better than recursive - } - - return 0; // no match -} - -/// fuzzy_match() -/// -/// Performs exhaustive search via recursion to find all possible matches and -/// match with highest score. -/// Scores values have no intrinsic meaning. Possible score range is not -/// normalized and varies with pattern. -/// Recursion is limited internally (default=10) to prevent degenerate cases -/// (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). -/// Patterns are limited to MAX_FUZZY_MATCHES characters. -/// -/// @return true if "pat_arg" matches "str". Also returns the match score in -/// "outScore" and the matching character positions in "matches". -bool fuzzy_match(char *const str, const char *const pat_arg, const bool matchseq, - int *const outScore, uint32_t *const matches, const int maxMatches, bool camelcase) - FUNC_ATTR_NONNULL_ALL -{ - const int len = mb_charlen(str); - bool complete = false; - int numMatches = 0; - - *outScore = 0; - - char *const save_pat = xstrdup(pat_arg); - char *pat = save_pat; - char *p = pat; - - // Try matching each word in "pat_arg" in "str" - while (true) { - if (matchseq) { - complete = true; - } else { - // Extract one word from the pattern (separated by space) - p = skipwhite(p); - if (*p == NUL) { - break; - } - pat = p; - while (*p != NUL && !ascii_iswhite(utf_ptr2char(p))) { - MB_PTR_ADV(p); - } - if (*p == NUL) { // processed all the words - complete = true; - } - *p = NUL; - } - - int score = 0; - int recursionCount = 0; - const int matchCount - = fuzzy_match_recursive(pat, str, 0, &score, str, len, NULL, - matches + numMatches, - maxMatches - numMatches, 0, &recursionCount, camelcase); - if (matchCount == 0) { - numMatches = 0; - break; - } - - // Accumulate the match score and the number of matches - *outScore += score; - numMatches += matchCount; - - if (complete) { - break; - } - - // try matching the next word - p++; - } - - xfree(save_pat); - return numMatches != 0; -} - -/// Sort the fuzzy matches in the descending order of the match score. -/// For items with same score, retain the order using the index (stable sort) -static int fuzzy_match_item_compare(const void *const s1, const void *const s2) - FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE -{ - const int v1 = ((const fuzzyItem_T *)s1)->score; - const int v2 = ((const fuzzyItem_T *)s2)->score; - const int idx1 = ((const fuzzyItem_T *)s1)->idx; - const int idx2 = ((const fuzzyItem_T *)s2)->idx; - - if (v1 == v2) { - return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; - } - return v1 > v2 ? -1 : 1; -} - -/// Fuzzy search the string "str" in a list of "items" and return the matching -/// strings in "fmatchlist". -/// If "matchseq" is true, then for multi-word search strings, match all the -/// words in sequence. -/// If "items" is a list of strings, then search for "str" in the list. -/// If "items" is a list of dicts, then either use "key" to lookup the string -/// for each item or use "item_cb" Funcref function to get the string. -/// If "retmatchpos" is true, then return a list of positions where "str" -/// matches for each item. -static void fuzzy_match_in_list(list_T *const l, char *const str, const bool matchseq, - const char *const key, Callback *const item_cb, - const bool retmatchpos, list_T *const fmatchlist, - const int max_matches, bool camelcase) - FUNC_ATTR_NONNULL_ARG(2, 5, 7) -{ - int len = tv_list_len(l); - if (len == 0) { - return; - } - if (max_matches > 0 && len > max_matches) { - len = max_matches; - } - - fuzzyItem_T *const items = xcalloc((size_t)len, sizeof(fuzzyItem_T)); - int match_count = 0; - uint32_t matches[MAX_FUZZY_MATCHES]; - - // For all the string items in items, get the fuzzy matching score - TV_LIST_ITER(l, li, { - if (max_matches > 0 && match_count >= max_matches) { - break; - } - - char *itemstr = NULL; - typval_T rettv; - rettv.v_type = VAR_UNKNOWN; - const typval_T *const tv = TV_LIST_ITEM_TV(li); - if (tv->v_type == VAR_STRING) { // list of strings - itemstr = tv->vval.v_string; - } else if (tv->v_type == VAR_DICT && (key != NULL || item_cb->type != kCallbackNone)) { - // For a dict, either use the specified key to lookup the string or - // use the specified callback function to get the string. - if (key != NULL) { - itemstr = tv_dict_get_string(tv->vval.v_dict, key, false); - } else { - typval_T argv[2]; - - // Invoke the supplied callback (if any) to get the dict item - tv->vval.v_dict->dv_refcount++; - argv[0].v_type = VAR_DICT; - argv[0].vval.v_dict = tv->vval.v_dict; - argv[1].v_type = VAR_UNKNOWN; - if (callback_call(item_cb, 1, argv, &rettv)) { - if (rettv.v_type == VAR_STRING) { - itemstr = rettv.vval.v_string; - } - } - tv_dict_unref(tv->vval.v_dict); - } - } - - int score; - if (itemstr != NULL && fuzzy_match(itemstr, str, matchseq, &score, matches, - MAX_FUZZY_MATCHES, camelcase)) { - items[match_count].idx = (int)match_count; - items[match_count].item = li; - items[match_count].score = score; - - // Copy the list of matching positions in itemstr to a list, if - // "retmatchpos" is set. - if (retmatchpos) { - items[match_count].lmatchpos = tv_list_alloc(kListLenMayKnow); - int j = 0; - const char *p = str; - while (*p != NUL) { - if (!ascii_iswhite(utf_ptr2char(p)) || matchseq) { - tv_list_append_number(items[match_count].lmatchpos, matches[j]); - j++; - } - MB_PTR_ADV(p); - } - } - match_count++; - } - tv_clear(&rettv); - }); - - if (match_count > 0) { - // Sort the list by the descending order of the match score - qsort(items, (size_t)match_count, sizeof(fuzzyItem_T), fuzzy_match_item_compare); - - // For matchfuzzy(), return a list of matched strings. - // ['str1', 'str2', 'str3'] - // For matchfuzzypos(), return a list with three items. - // The first item is a list of matched strings. The second item - // is a list of lists where each list item is a list of matched - // character positions. The third item is a list of matching scores. - // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] - list_T *retlist; - if (retmatchpos) { - const listitem_T *const li = tv_list_find(fmatchlist, 0); - assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); - retlist = TV_LIST_ITEM_TV(li)->vval.v_list; - } else { - retlist = fmatchlist; - } - - // Copy the matching strings with a valid score to the return list - for (int i = 0; i < match_count; i++) { - if (items[i].score == SCORE_NONE) { - break; - } - tv_list_append_tv(retlist, TV_LIST_ITEM_TV(items[i].item)); - } - - // next copy the list of matching positions - if (retmatchpos) { - const listitem_T *li = tv_list_find(fmatchlist, -2); - assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); - retlist = TV_LIST_ITEM_TV(li)->vval.v_list; - - for (int i = 0; i < match_count; i++) { - if (items[i].score == SCORE_NONE) { - break; - } - tv_list_append_list(retlist, items[i].lmatchpos); - } - - // copy the matching scores - li = tv_list_find(fmatchlist, -1); - assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); - retlist = TV_LIST_ITEM_TV(li)->vval.v_list; - for (int i = 0; i < match_count; i++) { - if (items[i].score == SCORE_NONE) { - break; - } - tv_list_append_number(retlist, items[i].score); - } - } - } - xfree(items); -} - -/// Do fuzzy matching. Returns the list of matched strings in "rettv". -/// If "retmatchpos" is true, also returns the matching character positions. -static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv, - const bool retmatchpos) - FUNC_ATTR_NONNULL_ALL -{ - // validate and get the arguments - if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) { - semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()"); - return; - } - if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) { - semsg(_(e_invarg2), tv_get_string(&argvars[1])); - return; - } - - Callback cb = CALLBACK_NONE; - const char *key = NULL; - bool matchseq = false; - int max_matches = 0; - bool camelcase = true; - if (argvars[2].v_type != VAR_UNKNOWN) { - if (tv_check_for_nonnull_dict_arg(argvars, 2) == FAIL) { - return; - } - - // To search a dict, either a callback function or a key can be - // specified. - dict_T *const d = argvars[2].vval.v_dict; - const dictitem_T *di; - if ((di = tv_dict_find(d, "key", -1)) != NULL) { - if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL - || *di->di_tv.vval.v_string == NUL) { - semsg(_(e_invargNval), "key", tv_get_string(&di->di_tv)); - return; - } - key = tv_get_string(&di->di_tv); - } else if (!tv_dict_get_callback(d, "text_cb", -1, &cb)) { - semsg(_(e_invargval), "text_cb"); - return; - } - - if ((di = tv_dict_find(d, "limit", -1)) != NULL) { - if (di->di_tv.v_type != VAR_NUMBER) { - semsg(_(e_invargval), "limit"); - return; - } - max_matches = (int)tv_get_number_chk(&di->di_tv, NULL); - } - - if ((di = tv_dict_find(d, "camelcase", -1)) != NULL) { - if (di->di_tv.v_type != VAR_BOOL) { - semsg(_(e_invargval), "camelcase"); - return; - } - camelcase = tv_get_bool_chk(&di->di_tv, NULL); - } - - if (tv_dict_find(d, "matchseq", -1) != NULL) { - matchseq = true; - } - } - - // get the fuzzy matches - tv_list_alloc_ret(rettv, retmatchpos ? 3 : kListLenUnknown); - if (retmatchpos) { - // For matchfuzzypos(), a list with three items are returned. First - // item is a list of matching strings, the second item is a list of - // lists with matching positions within each string and the third item - // is the list of scores of the matches. - tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); - tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); - tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); - } - - fuzzy_match_in_list(argvars[0].vval.v_list, - (char *)tv_get_string(&argvars[1]), matchseq, key, - &cb, retmatchpos, rettv->vval.v_list, max_matches, camelcase); - callback_free(&cb); -} - -/// "matchfuzzy()" function -void f_matchfuzzy(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) -{ - do_fuzzymatch(argvars, rettv, false); -} - -/// "matchfuzzypos()" function -void f_matchfuzzypos(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) -{ - do_fuzzymatch(argvars, rettv, true); -} - -/// Same as fuzzy_match_item_compare() except for use with a string match -static int fuzzy_match_str_compare(const void *const s1, const void *const s2) - FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE -{ - const int v1 = ((fuzmatch_str_T *)s1)->score; - const int v2 = ((fuzmatch_str_T *)s2)->score; - const int idx1 = ((fuzmatch_str_T *)s1)->idx; - const int idx2 = ((fuzmatch_str_T *)s2)->idx; - - if (v1 == v2) { - return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; - } else { - return v1 > v2 ? -1 : 1; - } -} - -/// Sort fuzzy matches by score -static void fuzzy_match_str_sort(fuzmatch_str_T *const fm, const int sz) - FUNC_ATTR_NONNULL_ALL -{ - // Sort the list by the descending order of the match score - qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_str_compare); -} - -/// Same as fuzzy_match_item_compare() except for use with a function name -/// string match. <SNR> functions should be sorted to the end. -static int fuzzy_match_func_compare(const void *const s1, const void *const s2) - FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE -{ - const int v1 = ((fuzmatch_str_T *)s1)->score; - const int v2 = ((fuzmatch_str_T *)s2)->score; - const int idx1 = ((fuzmatch_str_T *)s1)->idx; - const int idx2 = ((fuzmatch_str_T *)s2)->idx; - const char *const str1 = ((fuzmatch_str_T *)s1)->str; - const char *const str2 = ((fuzmatch_str_T *)s2)->str; - - if (*str1 != '<' && *str2 == '<') { - return -1; - } - if (*str1 == '<' && *str2 != '<') { - return 1; - } - if (v1 == v2) { - return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; - } - return v1 > v2 ? -1 : 1; -} - -/// Sort fuzzy matches of function names by score. -/// <SNR> functions should be sorted to the end. -static void fuzzy_match_func_sort(fuzmatch_str_T *const fm, const int sz) - FUNC_ATTR_NONNULL_ALL -{ - // Sort the list by the descending order of the match score - qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_func_compare); -} - -/// Fuzzy match "pat" in "str". -/// @returns 0 if there is no match. Otherwise, returns the match score. -int fuzzy_match_str(char *const str, const char *const pat) - FUNC_ATTR_WARN_UNUSED_RESULT -{ - if (str == NULL || pat == NULL) { - return 0; - } - - int score = 0; - uint32_t matchpos[MAX_FUZZY_MATCHES]; - fuzzy_match(str, pat, true, &score, matchpos, sizeof(matchpos) / sizeof(matchpos[0]), true); - - return score; -} - -/// Fuzzy match the position of string "pat" in string "str". -/// @returns a dynamic array of matching positions. If there is no match, returns NULL. -garray_T *fuzzy_match_str_with_pos(char *const str, const char *const pat) -{ - if (str == NULL || pat == NULL) { - return NULL; - } - - garray_T *match_positions = xmalloc(sizeof(garray_T)); - ga_init(match_positions, sizeof(uint32_t), 10); - - unsigned matches[MAX_FUZZY_MATCHES]; - int score = 0; - if (!fuzzy_match(str, pat, false, &score, matches, MAX_FUZZY_MATCHES, true) - || score == 0) { - ga_clear(match_positions); - xfree(match_positions); - return NULL; - } - - int j = 0; - for (const char *p = pat; *p != NUL; MB_PTR_ADV(p)) { - if (!ascii_iswhite(utf_ptr2char(p))) { - GA_APPEND(uint32_t, match_positions, matches[j]); - j++; - } - } - - return match_positions; -} - -/// This function splits the line pointed to by `*ptr` into words and performs -/// a fuzzy match for the pattern `pat` on each word. It iterates through the -/// line, moving `*ptr` to the start of each word during the process. -/// -/// If a match is found: -/// - `*ptr` points to the start of the matched word. -/// - `*len` is set to the length of the matched word. -/// - `*score` contains the match score. -/// -/// If no match is found, `*ptr` is updated to the end of the line. -bool fuzzy_match_str_in_line(char **ptr, char *pat, int *len, pos_T *current_pos, int *score) -{ - char *str = *ptr; - char *strBegin = str; - char *end = NULL; - char *start = NULL; - bool found = false; - - if (str == NULL || pat == NULL) { - return found; - } - char *line_end = find_line_end(str); - - while (str < line_end) { - // Skip non-word characters - start = find_word_start(str); - if (*start == NUL) { - break; - } - end = find_word_end(start); - - // Extract the word from start to end - char save_end = *end; - *end = NUL; - - // Perform fuzzy match - *score = fuzzy_match_str(start, pat); - *end = save_end; - - if (*score > 0) { - *len = (int)(end - start); - found = true; - *ptr = start; - if (current_pos) { - current_pos->col += (int)(end - strBegin); - } - break; - } - - // Move to the end of the current word for the next iteration - str = end; - // Ensure we continue searching after the current word - while (*str != NUL && !vim_iswordp(str)) { - MB_PTR_ADV(str); - } - } - - if (!found) { - *ptr = line_end; - } - - return found; -} - -/// Search for the next fuzzy match in the specified buffer. -/// This function attempts to find the next occurrence of the given pattern -/// in the buffer, starting from the current position. It handles line wrapping -/// and direction of search. -/// -/// Return true if a match is found, otherwise false. -bool search_for_fuzzy_match(buf_T *buf, pos_T *pos, char *pattern, int dir, pos_T *start_pos, - int *len, char **ptr, int *score) -{ - pos_T current_pos = *pos; - pos_T circly_end; - bool found_new_match = false; - bool looped_around = false; - - bool whole_line = ctrl_x_mode_whole_line(); - - if (buf == curbuf) { - circly_end = *start_pos; - } else { - circly_end.lnum = buf->b_ml.ml_line_count; - circly_end.col = 0; - circly_end.coladd = 0; - } - - if (whole_line && start_pos->lnum != pos->lnum) { - current_pos.lnum += dir; - } - - while (true) { - // Check if looped around and back to start position - if (looped_around && equalpos(current_pos, circly_end)) { - break; - } - - // Ensure current_pos is valid - if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) { - // Get the current line buffer - *ptr = ml_get_buf(buf, current_pos.lnum); - if (!whole_line) { - *ptr += current_pos.col; - } - - // If ptr is end of line is reached, move to next line - // or previous line based on direction - if (*ptr != NULL && **ptr != NUL) { - if (!whole_line) { - // Try to find a fuzzy match in the current line starting - // from current position - found_new_match = fuzzy_match_str_in_line(ptr, pattern, - len, &current_pos, score); - if (found_new_match) { - *pos = current_pos; - break; - } else if (looped_around && current_pos.lnum == circly_end.lnum) { - break; - } - } else { - if (fuzzy_match_str(*ptr, pattern) > 0) { - found_new_match = true; - *pos = current_pos; - *len = ml_get_buf_len(buf, current_pos.lnum); - break; - } - } - } - } - - // Move to the next line or previous line based on direction - if (dir == FORWARD) { - if (++current_pos.lnum > buf->b_ml.ml_line_count) { - if (p_ws) { - current_pos.lnum = 1; - looped_around = true; - } else { - break; - } - } - } else { - if (--current_pos.lnum < 1) { - if (p_ws) { - current_pos.lnum = buf->b_ml.ml_line_count; - looped_around = true; - } else { - break; - } - } - } - current_pos.col = 0; - } - - return found_new_match; -} - -/// Copy a list of fuzzy matches into a string list after sorting the matches by -/// the fuzzy score. Frees the memory allocated for "fuzmatch". -void fuzzymatches_to_strmatches(fuzmatch_str_T *const fuzmatch, char ***const matches, - const int count, const bool funcsort) - FUNC_ATTR_NONNULL_ARG(2) -{ - if (count <= 0) { - return; - } - - *matches = xmalloc((size_t)count * sizeof(char *)); - - // Sort the list by the descending order of the match score - if (funcsort) { - fuzzy_match_func_sort(fuzmatch, count); - } else { - fuzzy_match_str_sort(fuzmatch, count); - } - - for (int i = 0; i < count; i++) { - (*matches)[i] = fuzmatch[i].str; - } - xfree(fuzmatch); -} - -/// Free a list of fuzzy string matches. -void fuzmatch_str_free(fuzmatch_str_T *const fuzmatch, int count) -{ - if (count <= 0 || fuzmatch == NULL) { - return; - } - while (count--) { - xfree(fuzmatch[count].str); - } - xfree(fuzmatch); -} - /// Get line "lnum" and copy it into "buf[LSIZE]". /// The copy is made because the regexp may make the line invalid when using a /// mark. diff --git a/src/nvim/search.h b/src/nvim/search.h @@ -67,11 +67,6 @@ enum { SEARCH_STAT_DEF_TIMEOUT = 40, }; // '[>9999/>9999]': 13 + 1 (NUL) enum { SEARCH_STAT_BUF_LEN = 16, }; -enum { - /// Maximum number of characters that can be fuzzy matched - MAX_FUZZY_MATCHES = 256, -}; - /// Structure containing offset definition for the last search pattern /// /// @note Only offset for the last search pattern is used, not for the last @@ -112,14 +107,6 @@ typedef struct { int last_maxcount; // the max count of the last search } searchstat_T; -/// Fuzzy matched string list item. Used for fuzzy match completion. Items are -/// usually sorted by "score". The "idx" member is used for stable-sort. -typedef struct { - int idx; - char *str; - int score; -} fuzmatch_str_T; - #ifdef INCLUDE_GENERATED_DECLARATIONS # include "search.h.generated.h" #endif diff --git a/test/old/testdir/test_cmdline.vim b/test/old/testdir/test_cmdline.vim @@ -3262,7 +3262,7 @@ endfunc func Test_fuzzy_completion_bufname_fullpath() CheckUnix set wildoptions& - call mkdir('Xcmd/Xstate/Xfile.js', 'p') + call mkdir('Xcmd/Xstate/Xfile.js', 'pR') edit Xcmd/Xstate/Xfile.js cd Xcmd/Xstate enew @@ -3270,9 +3270,8 @@ func Test_fuzzy_completion_bufname_fullpath() call assert_equal('"b CmdStateFile', @:) set wildoptions=fuzzy call feedkeys(":b CmdStateFile\<Tab>\<C-B>\"\<CR>", 'tx') - call assert_match('Xcmd/Xstate/Xfile.js$', @:) + call assert_equal('"b CmdStateFile', @:) cd - - call delete('Xcmd', 'rf') set wildoptions& endfunc @@ -3551,7 +3550,7 @@ func Test_fuzzy_completion_mapname() nmap <Plug>state : nmap <Plug>FendingOff : call feedkeys(":nmap <Plug>fo\<C-A>\<C-B>\"\<CR>", 'tx') - call assert_equal("\"nmap <Plug>format <Plug>TestFOrmat <Plug>FendingOff <Plug>goformat <Plug>fendoff", @:) + call assert_equal("\"nmap <Plug>format <Plug>TestFOrmat <Plug>FendingOff <Plug>fendoff <Plug>goformat", @:) nunmap <Plug>format nunmap <Plug>goformat nunmap <Plug>TestFOrmat @@ -3673,9 +3672,7 @@ func Test_fuzzy_completion_syntax_group() call assert_equal('"syntax list mpar', @:) set wildoptions=fuzzy call feedkeys(":syntax list mpar\<Tab>\<C-B>\"\<CR>", 'tx') - " Fuzzy match prefers NvimParenthesis over MatchParen - " call assert_equal('"syntax list MatchParen', @:) - call assert_equal('"syntax list NvimParenthesis', @:) + call assert_equal('"syntax list MatchParen', @:) set wildoptions& endfunc @@ -3726,7 +3723,7 @@ func Test_fuzzy_completion_cmd_sort_results() command T123FendingOff : set wildoptions=fuzzy call feedkeys(":T123fo\<C-A>\<C-B>\"\<CR>", 'tx') - call assert_equal('"T123format T123TestFOrmat T123FendingOff T123goformat T123fendoff', @:) + call assert_equal('"T123format T123TestFOrmat T123FendingOff T123fendoff T123goformat', @:) delcommand T123format delcommand T123goformat delcommand T123TestFOrmat diff --git a/test/old/testdir/test_matchfuzzy.vim b/test/old/testdir/test_matchfuzzy.vim @@ -18,11 +18,11 @@ func Test_matchfuzzy() call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra')) call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa')) call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one')) - call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) - call assert_equal(['onetwo', 'one_two'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) + call assert_equal(['oneTwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) + call assert_equal([], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len()) - call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) + call assert_equal([repeat('a', 300)], matchfuzzy([repeat('a', 300)], repeat('a', 257))) " full match has highest score call assert_equal(['Cursor', 'lCursor'], matchfuzzy(["hello", "lCursor", "Cursor"], "Cursor")) " matches with same score should not be reordered @@ -30,8 +30,7 @@ func Test_matchfuzzy() call assert_equal(l, l->matchfuzzy('abc')) " Tests for match preferences - " preference for camel case match - call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo')) + call assert_equal(['onetwo', 'oneTwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo')) " preference for match after a separator (_ or space) call assert_equal(['onetwo', 'one_two', 'one two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) " preference for leading letter match @@ -47,7 +46,7 @@ func Test_matchfuzzy() " gap penalty call assert_equal(['xxayybxxxx', 'xxayyybxxx', 'xxayyyybxx'], ['xxayyyybxx', 'xxayyybxxx', 'xxayybxxxx']->matchfuzzy('ab')) " path separator vs word separator - call assert_equal(['color/setup.vim', 'color\\setup.vim', 'color setup.vim', 'color_setup.vim', 'colorsetup.vim'], matchfuzzy(['colorsetup.vim', 'color setup.vim', 'color/setup.vim', 'color_setup.vim', 'color\\setup.vim'], 'setup.vim')) + call assert_equal(['color/setup.vim', 'color setup.vim', 'color_setup.vim', 'colorsetup.vim', 'color\\setup.vim'], matchfuzzy(['colorsetup.vim', 'color setup.vim', 'color/setup.vim', 'color_setup.vim', 'color\\setup.vim'], 'setup.vim')) " match multiple words (separated by space) call assert_equal(['foo bar baz'], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzy('baz foo')) @@ -92,15 +91,16 @@ func Test_matchfuzzy() call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:') " camelcase - call assert_equal(['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], - \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur')) - call assert_equal(['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], - \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:true})) call assert_equal(['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], - \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:false})) + \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur')) call assert_equal(['things', 'sThings', 'thisThings'], - \ matchfuzzy(['things','sThings', 'thisThings'], 'thin', {'camelcase': v:false})) - call assert_fails("let x = matchfuzzy([], 'foo', {'camelcase': []})", 'E475: Invalid value for argument camelcase') + \ matchfuzzy(['things','sThings', 'thisThings'], 'thin')) + call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['mytestcase', 'MyTestCase'], 'mtc')) + call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['MyTestCase', 'mytestcase'], 'mtc')) + call assert_equal(['MyTest'], matchfuzzy(['Mytest', 'mytest', 'MyTest'], 'MyT')) + call assert_equal(['CamelCaseMatchIngAlg'], + \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], 'CamelCase')) + call assert_equal(['SomeWord', 'PrefixSomeWord'], matchfuzzy(['PrefixSomeWord', 'SomeWord'], 'somewor')) " Test in latin1 encoding let save_enc = &encoding @@ -112,50 +112,57 @@ endfunc " Test for the matchfuzzypos() function func Test_matchfuzzypos() - call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'curl'], 'rl')) - call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]], [990, 985]], matchfuzzypos(['world', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]], [990, 985]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) call assert_equal([['hello', 'hello world hello world'], - \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [500, 382]], + \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [2147483647, 4810]], \ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello')) - call assert_equal([['aaaaaaa'], [[0, 1, 2]], [266]], matchfuzzypos(['aaaaaaa'], 'aaa')) - call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b')) - call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b')) - call assert_equal([['a b'], [[0]], [137]], matchfuzzypos(['a b'], ' a ')) + call assert_equal([['aaaaaaa'], [[0, 1, 2]], [2880]], matchfuzzypos(['aaaaaaa'], 'aaa')) + call assert_equal([['a b'], [[0, 3]], [1670]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0, 3]], [1670]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0]], [885]], matchfuzzypos(['a b'], ' a ')) call assert_equal([[], [], []], matchfuzzypos(['a b'], ' ')) call assert_equal([[], [], []], matchfuzzypos(['world', 'curl'], 'ab')) let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256)) call assert_equal(range(256), x[1][0]) - call assert_equal([[], [], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257))) + + " fuzzy matches limited to 1024 chars + let matches = matchfuzzypos([repeat('a', 1030)], repeat('a', 1025)) + call assert_equal([repeat('a', 1030)], matches[0]) + call assert_equal(1024, len(matches[1][0])) + call assert_equal(1023, matches[1][0][1023]) + call assert_equal([[], [], []], matchfuzzypos([], 'abc')) + call assert_equal([[], [], []], matchfuzzypos(['abc'], '')) " match in a long string - call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [155]], + call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [500]], \ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc')) " preference for camel case match - call assert_equal([['xabcxxaBc'], [[6, 7, 8]], [269]], matchfuzzypos(['xabcxxaBc'], 'abc')) + call assert_equal([['xabcxxaBc'], [[7, 8]], [1665]], matchfuzzypos(['xabcxxaBc'], 'bc')) " preference for match after a separator (_ or space) - call assert_equal([['xabx_ab'], [[5, 6]], [195]], matchfuzzypos(['xabx_ab'], 'ab')) + call assert_equal([['xabx_ab'], [[5, 6]], [1775]], matchfuzzypos(['xabx_ab'], 'ab')) " preference for leading letter match - call assert_equal([['abcxabc'], [[0, 1]], [200]], matchfuzzypos(['abcxabc'], 'ab')) + call assert_equal([['abcxabc'], [[0, 1]], [1875]], matchfuzzypos(['abcxabc'], 'ab')) " preference for sequential match - call assert_equal([['aobncedone'], [[7, 8, 9]], [233]], matchfuzzypos(['aobncedone'], 'one')) + call assert_equal([['aobncedone'], [[7, 8, 9]], [1965]], matchfuzzypos(['aobncedone'], 'one')) " best recursive match - call assert_equal([['xoone'], [[2, 3, 4]], [243]], matchfuzzypos(['xoone'], 'one')) + call assert_equal([['xoone'], [[2, 3, 4]], [1990]], matchfuzzypos(['xoone'], 'one')) " match multiple words (separated by space) - call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo')) + call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [5620]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo')) call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo', {'matchseq': 1})) - call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz')) - call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 5, 10]], [476]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1})) + call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [5620]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz')) + call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 9, 10]], [6660]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1})) call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('one two')) call assert_equal([[], [], []], ['foo bar']->matchfuzzypos(" \t ")) - call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [1057]], ['grace']->matchfuzzypos('race ace grace')) + call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [2147483647]], ['grace']->matchfuzzypos('race ace grace')) let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] - call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]], + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [2885]], \ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}})) - call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]], + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [2885]], \ matchfuzzypos(l, 'cam', {'key' : 'val'})) call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}})) call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'key' : 'val'})) @@ -175,30 +182,17 @@ func Test_matchfuzzypos() " call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') " case match - call assert_equal([['Match', 'match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['match', 'Match'], 'Ma')) - call assert_equal([['match', 'Match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['Match', 'match'], 'ma')) - " CamelCase has high weight even case match - call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['mytestcase', 'MyTestCase'], 'mtc')) - call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['MyTestCase', 'mytestcase'], 'mtc')) - call assert_equal(['MyTest', 'Mytest', 'mytest', ],matchfuzzy(['Mytest', 'mytest', 'MyTest'], 'MyT')) - call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], - \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelCase')) - call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], - \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelcaseM')) + call assert_equal([['Match'], [[0, 1]], [1885]], matchfuzzypos(['match', 'Match'], 'Ma')) + call assert_equal([['match', 'Match'], [[0, 1], [0, 1]], [1885, 1885]], matchfuzzypos(['Match', 'match'], 'ma')) let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:') " camelcase - call assert_equal([['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], [[1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8], [0, 1, 2], [0, 1, 2], [0, 1, 2]], [318, 311, 308, 303, 267, 264, 263]], + call assert_equal([['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8]], [2885, 2870, 2865, 2680, 2670, 2655, 2655]], \ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur')) - call assert_equal([['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], [[1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8], [0, 1, 2], [0, 1, 2], [0, 1, 2]], [318, 311, 308, 303, 267, 264, 263]], - \ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:true})) - call assert_equal([['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8]], [267, 264, 263, 246, 239, 236, 231]], - \ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:false})) - call assert_equal([['things', 'sThings', 'thisThings'], [[0, 1, 2, 3], [1, 2, 3, 4], [0, 1, 2, 7]], [333, 287, 279]], - \ matchfuzzypos(['things','sThings', 'thisThings'], 'thin', {'camelcase': v:false})) - call assert_fails("let x = matchfuzzypos([], 'foo', {'camelcase': []})", 'E475: Invalid value for argument camelcase') + call assert_equal([['things', 'sThings', 'thisThings'], [[0, 1, 2, 3], [1, 2, 3, 4], [0, 1, 6, 7]], [3890, 3685, 3670]], + \ matchfuzzypos(['things','sThings', 'thisThings'], 'thin')) endfunc " Test for matchfuzzy() with multibyte characters @@ -216,9 +210,14 @@ func Test_matchfuzzy_mbyte() call assert_equal(['세 마리의 작은 돼지'], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('돼지 마리의')) call assert_equal([], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('파란 하늘')) - " preference for camel case match + " camel case match + call assert_equal(['oneąwo', 'oneĄwo'], + \ ['oneĄwo', 'oneąwo']->matchfuzzy('oneąwo')) + call assert_equal(['oneĄwo'], + \ ['oneĄwo', 'oneąwo']->matchfuzzy('Ąwo')) + " prefer camelcase when searched first character matches upper case call assert_equal(['oneĄwo', 'oneąwo'], - \ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo')) + \ ['oneĄwo', 'oneąwo']->matchfuzzy('ąw')) " preference for complete match then match after separator (_ or space) call assert_equal(['ⅠⅡabㄟㄠ'] + sort(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']), \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡa_bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ')) @@ -242,41 +241,39 @@ endfunc " Test for matchfuzzypos() with multibyte characters func Test_matchfuzzypos_mbyte() CheckFeature multi_lang - call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]], [273]], + call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]], [4890]], \ matchfuzzypos(['こんにちは世界'], 'こんにちは')) - call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [88]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) + call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [-20]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) " reverse the order of characters call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ')) - call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [252, 143]], + call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [2885, 670]], \ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], - \ [[0, 1], [0, 1], [0, 1], [0, 2]], [176, 173, 170, 135]], + \ [[0, 1], [0, 1], [0, 1], [0, 2]], [1880, 1865, 1850, 890]], \ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) - call assert_equal([['ααααααα'], [[0, 1, 2]], [216]], + call assert_equal([['ααααααα'], [[0, 1, 2]], [2880]], \ matchfuzzypos(['ααααααα'], 'ααα')) call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl')) let x = matchfuzzypos([repeat('Ψ', 256)], repeat('Ψ', 256)) call assert_equal(range(256), x[1][0]) - call assert_equal([[], [], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))) + call assert_equal([repeat('✓', 300)], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))[0]) " match multiple words (separated by space) - call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]], [328]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의')) + call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]], [4515]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의')) call assert_equal([[], [], []], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('파란 하늘')) " match in a long string - call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [105]], + call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [500]], \ matchfuzzypos([repeat('ぶ', 300) .. 'ẼẼẼ'], 'ẼẼẼ')) - " preference for camel case match - call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]], [219]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ')) " preference for match after a separator (_ or space) - call assert_equal([['xちだx_ちだ'], [[5, 6]], [145]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) + call assert_equal([['xちだx_ちだ'], [[5, 6]], [1775]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) " preference for leading letter match - call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]], [150]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ')) + call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]], [1875]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ')) " preference for sequential match - call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]], [158]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ')) + call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]], [1965]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ')) " best recursive match - call assert_equal([['xффйд'], [[2, 3, 4]], [168]], matchfuzzypos(['xффйд'], 'фйд')) + call assert_equal([['xффйд'], [[2, 3, 4]], [1990]], matchfuzzypos(['xффйд'], 'фйд')) endfunc " Test for matchfuzzy() with limit