commit 9843573a61f42b7f9ea5ded0f5221320ac1925c5
parent 6f904cfef15b06473710977d3656e67ef3a930d0
Author: zeertzjq <zeertzjq@outlook.com>
Date: Thu, 14 Aug 2025 07:18:05 +0800
Merge pull request #35320 from zeertzjq/vim-9.1.1627
vim-patch:9.1.{1627,1628}
Diffstat:
19 files changed, 1104 insertions(+), 1082 deletions(-)
diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt
@@ -327,6 +327,8 @@ These existing features changed their behavior.
• |$VIM| and |$VIMRUNTIME| no longer check for Vim version-specific runtime
directory `vim{number}` (e.g. `vim82`).
• 'scrollback' maximum value increased from 100000 to 1000000
+• |matchfuzzy()| and |matchfuzzypos()| use an improved fuzzy matching algorithm
+ (same as fzy).
==============================================================================
REMOVED FEATURES *news-removed*
diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt
@@ -1481,34 +1481,51 @@ Finally, these constructs are unique to Perl:
==============================================================================
11. Fuzzy matching *fuzzy-matching*
-Fuzzy matching refers to matching strings using a non-exact search string.
-Fuzzy matching will match a string, if all the characters in the search string
-are present anywhere in the string in the same order. Case is ignored. In a
-matched string, other characters can be present between two consecutive
-characters in the search string. If the search string has multiple words, then
-each word is matched separately. So the words in the search string can be
-present in any order in a string.
-
-Fuzzy matching assigns a score for each matched string based on the following
-criteria:
- - The number of sequentially matching characters.
- - The number of characters (distance) between two consecutive matching
- characters.
- - Matches at the beginning of a word
- - Matches at a camel case character (e.g. Case in CamelCase)
- - Matches after a path separator or a hyphen.
- - The number of unmatched characters in a string.
- - A full/exact match is preferred.
-The matching string with the highest score is returned first.
-
-For example, when you search for the "get pat" string using fuzzy matching, it
-will match the strings "GetPattern", "PatternGet", "getPattern", "patGetter",
-"getSomePattern", "MatchpatternGet" etc.
-
-The functions |matchfuzzy()| and |matchfuzzypos()| can be used to fuzzy search
-a string in a List of strings. The matchfuzzy() function returns a List of
-matching strings. The matchfuzzypos() functions returns the List of matches,
-the matching positions and the fuzzy match scores.
+Fuzzy matching scores how well a string matches a pattern when the pattern
+characters appear in order but not necessarily contiguously.
+
+Example: >
+ Pattern: "vim"
+ Candidates: "vim" -> perfect
+ "vimeo" -> good (v i m)
+ "voice mail" -> weaker (v _ i _ _ _ m)
+ "vintage" -> no match (no "m")
+<
+If the search string has multiple words, each word is matched separately and
+may appear in any order in the candidate. For example "get pat" matches
+"GetPattern", "PatternGet", "getPattern", "patGetter", "getSomePattern",
+"MatchpatternGet", etc.
+
+The 'ignorecase' and 'smartcase' options do not apply, case is ignored if the
+pattern is all lower case.
+
+Vim's implementation is based on the algorithm from the fzy project:
+https://github.com/jhawthorn/fzy
+
+It uses dynamic programming to compute an optimal score for a given pattern
+and candidate.
+
+The algorithm works in two stages:
+
+1. Forward pass
+ Scan the candidate left to right, tracking the best score for each
+ pattern position. Matches score higher when they occur at the start
+ of the candidate, the start of a word (space, underscore, dash,
+ camelCase), or directly after the previous match.
+
+2. Backward pass
+ Start from the best-scoring end position and step back to find match
+ positions, ensuring the alignment is optimal.
+
+Vim extends the original algorithm to support multibyte codepoints, allowing
+correct matching for UTF-8 and other encodings.
+
+Time complexity is O(pattern * candidate). Memory usage is proportional
+to the same.
+
+The |matchfuzzy()| and |matchfuzzypos()| functions perform fuzzy searching in
+a List of strings. |matchfuzzy()| returns the matching strings, while
+|matchfuzzypos()| returns the matches along with their positions and scores.
The "f" flag of `:vimgrep` enables fuzzy matching.
diff --git a/runtime/doc/vimfn.txt b/runtime/doc/vimfn.txt
@@ -6520,9 +6520,6 @@ matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()*
given sequence.
limit Maximum number of matches in {list} to be
returned. Zero means no limit.
- camelcase Use enhanced camel case scoring making results
- better suited for completion related to
- programming languages. Defaults to v:true.
If {list} is a list of dictionaries, then the optional {dict}
argument supports the following additional items:
diff --git a/runtime/lua/vim/_meta/vimfn.lua b/runtime/lua/vim/_meta/vimfn.lua
@@ -5918,9 +5918,6 @@ function vim.fn.matchend(expr, pat, start, count) end
--- given sequence.
--- limit Maximum number of matches in {list} to be
--- returned. Zero means no limit.
---- camelcase Use enhanced camel case scoring making results
---- better suited for completion related to
---- programming languages. Defaults to v:true.
---
--- If {list} is a list of dictionaries, then the optional {dict}
--- argument supports the following additional items:
diff --git a/src/gen/gen_eval.lua b/src/gen/gen_eval.lua
@@ -13,6 +13,7 @@ local hashpipe = assert(io.open(funcsfname, 'wb'))
hashpipe:write([[
#include "nvim/arglist.h"
+#include "nvim/channel.h"
#include "nvim/cmdexpand.h"
#include "nvim/cmdhist.h"
#include "nvim/diff.h"
@@ -28,7 +29,10 @@ hashpipe:write([[
#include "nvim/ex_docmd.h"
#include "nvim/ex_getln.h"
#include "nvim/fold.h"
+#include "nvim/fuzzy.h"
#include "nvim/getchar.h"
+#include "nvim/indent.h"
+#include "nvim/indent_c.h"
#include "nvim/insexpand.h"
#include "nvim/mapping.h"
#include "nvim/match.h"
diff --git a/src/nvim/buffer.c b/src/nvim/buffer.c
@@ -58,6 +58,7 @@
#include "nvim/file_search.h"
#include "nvim/fileio.h"
#include "nvim/fold.h"
+#include "nvim/fuzzy.h"
#include "nvim/garray.h"
#include "nvim/garray_defs.h"
#include "nvim/getchar.h"
@@ -100,7 +101,6 @@
#include "nvim/regexp_defs.h"
#include "nvim/runtime.h"
#include "nvim/runtime_defs.h"
-#include "nvim/search.h"
#include "nvim/spell.h"
#include "nvim/state_defs.h"
#include "nvim/statusline.h"
@@ -2477,12 +2477,12 @@ int ExpandBufnames(char *pat, int *num_file, char ***file, int options)
} else {
p = NULL;
// first try matching with the short file name
- if ((score = fuzzy_match_str(buf->b_sfname, pat)) != 0) {
+ if ((score = fuzzy_match_str(buf->b_sfname, pat)) != FUZZY_SCORE_NONE) {
p = buf->b_sfname;
}
if (p == NULL) {
// next try matching with the full path file name
- if ((score = fuzzy_match_str(buf->b_ffname, pat)) != 0) {
+ if ((score = fuzzy_match_str(buf->b_ffname, pat)) != FUZZY_SCORE_NONE) {
p = buf->b_ffname;
}
}
diff --git a/src/nvim/cmdexpand.c b/src/nvim/cmdexpand.c
@@ -29,6 +29,7 @@
#include "nvim/ex_cmds_defs.h"
#include "nvim/ex_docmd.h"
#include "nvim/ex_getln.h"
+#include "nvim/fuzzy.h"
#include "nvim/garray.h"
#include "nvim/garray_defs.h"
#include "nvim/getchar.h"
@@ -3096,7 +3097,7 @@ void ExpandGeneric(const char *const pat, expand_T *xp, regmatch_T *regmatch, ch
match = vim_regexec(regmatch, str, 0);
} else {
score = fuzzy_match_str(str, pat);
- match = (score != 0);
+ match = (score != FUZZY_SCORE_NONE);
}
} else {
match = true;
@@ -3408,7 +3409,7 @@ static int ExpandUserDefined(const char *const pat, expand_T *xp, regmatch_T *re
match = vim_regexec(regmatch, s, 0);
} else {
score = fuzzy_match_str(s, pat);
- match = (score != 0);
+ match = (score != FUZZY_SCORE_NONE);
}
} else {
match = true; // match everything
diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua
@@ -7272,9 +7272,6 @@ M.funcs = {
given sequence.
limit Maximum number of matches in {list} to be
returned. Zero means no limit.
- camelcase Use enhanced camel case scoring making results
- better suited for completion related to
- programming languages. Defaults to v:true.
If {list} is a list of dictionaries, then the optional {dict}
argument supports the following additional items:
diff --git a/src/nvim/fuzzy.c b/src/nvim/fuzzy.c
@@ -0,0 +1,922 @@
+// fuzzy.c: fuzzy matching algorithm and related functions
+//
+// Portions of this file are adapted from fzy (https://github.com/jhawthorn/fzy)
+// Original code:
+// Copyright (c) 2014 John Hawthorn
+// Licensed under the MIT License.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nvim/ascii_defs.h"
+#include "nvim/charset.h"
+#include "nvim/errors.h"
+#include "nvim/eval.h"
+#include "nvim/eval/typval.h"
+#include "nvim/fuzzy.h"
+#include "nvim/garray.h"
+#include "nvim/garray_defs.h"
+#include "nvim/globals.h"
+#include "nvim/insexpand.h"
+#include "nvim/macros_defs.h"
+#include "nvim/mbyte.h"
+#include "nvim/memline.h"
+#include "nvim/memory.h"
+#include "nvim/message.h"
+
+typedef double score_t;
+
+#define SCORE_MAX INFINITY
+#define SCORE_MIN (-INFINITY)
+#define SCORE_SCALE 1000
+
+typedef struct {
+ int idx; ///< used for stable sort
+ listitem_T *item;
+ int score;
+ list_T *lmatchpos;
+ char *pat;
+ char *itemstr;
+ bool itemstr_allocated;
+ int startpos;
+} fuzzyItem_T;
+
+typedef struct match_struct match_struct;
+
+#ifdef INCLUDE_GENERATED_DECLARATIONS
+# include "fuzzy.c.generated.h"
+#endif
+
+/// fuzzy_match()
+///
+/// @return true if "pat_arg" matches "str". Also returns the match score in
+/// "outScore" and the matching character positions in "matches".
+bool fuzzy_match(char *const str, const char *const pat_arg, const bool matchseq,
+ int *const outScore, uint32_t *const matches, const int maxMatches)
+ FUNC_ATTR_NONNULL_ALL
+{
+ bool complete = false;
+ int numMatches = 0;
+
+ *outScore = 0;
+
+ char *const save_pat = xstrdup(pat_arg);
+ char *pat = save_pat;
+ char *p = pat;
+
+ // Try matching each word in "pat_arg" in "str"
+ while (true) {
+ if (matchseq) {
+ complete = true;
+ } else {
+ // Extract one word from the pattern (separated by space)
+ p = skipwhite(p);
+ if (*p == NUL) {
+ break;
+ }
+ pat = p;
+ while (*p != NUL && !ascii_iswhite(utf_ptr2char(p))) {
+ MB_PTR_ADV(p);
+ }
+ if (*p == NUL) { // processed all the words
+ complete = true;
+ }
+ *p = NUL;
+ }
+
+ int score = FUZZY_SCORE_NONE;
+ if (has_match(pat, str)) {
+ score_t fzy_score = match_positions(pat, str, matches + numMatches);
+ score = (fzy_score == (score_t)SCORE_MIN
+ ? INT_MIN + 1
+ : (fzy_score == (score_t)SCORE_MAX
+ ? INT_MAX
+ : (fzy_score < 0
+ ? (int)ceil(fzy_score * SCORE_SCALE - 0.5)
+ : (int)floor(fzy_score * SCORE_SCALE + 0.5))));
+ }
+
+ if (score == FUZZY_SCORE_NONE) {
+ numMatches = 0;
+ *outScore = FUZZY_SCORE_NONE;
+ break;
+ }
+
+ if (score > 0 && *outScore > INT_MAX - score) {
+ *outScore = INT_MAX;
+ } else if (score < 0 && *outScore < INT_MIN + 1 - score) {
+ *outScore = INT_MIN + 1;
+ } else {
+ *outScore += score;
+ }
+
+ numMatches += mb_charlen(pat);
+
+ if (complete || numMatches >= maxMatches) {
+ break;
+ }
+
+ // try matching the next word
+ p++;
+ }
+
+ xfree(save_pat);
+ return numMatches != 0;
+}
+
+/// Sort the fuzzy matches in the descending order of the match score.
+/// For items with same score, retain the order using the index (stable sort)
+static int fuzzy_match_item_compare(const void *const s1, const void *const s2)
+ FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
+{
+ const int v1 = ((const fuzzyItem_T *)s1)->score;
+ const int v2 = ((const fuzzyItem_T *)s2)->score;
+
+ if (v1 == v2) {
+ const char *const pat = ((const fuzzyItem_T *)s1)->pat;
+ const size_t patlen = strlen(pat);
+ int startpos = ((const fuzzyItem_T *)s1)->startpos;
+ const bool exact_match1 = startpos >= 0
+ && strncmp(pat, ((fuzzyItem_T *)s1)->itemstr + startpos, patlen) == 0;
+ startpos = ((const fuzzyItem_T *)s2)->startpos;
+ const bool exact_match2 = startpos >= 0
+ && strncmp(pat, ((fuzzyItem_T *)s2)->itemstr + startpos, patlen) == 0;
+
+ if (exact_match1 == exact_match2) {
+ const int idx1 = ((const fuzzyItem_T *)s1)->idx;
+ const int idx2 = ((const fuzzyItem_T *)s2)->idx;
+ return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
+ } else if (exact_match2) {
+ return 1;
+ }
+ return -1;
+ } else {
+ return v1 > v2 ? -1 : 1;
+ }
+}
+
+/// Fuzzy search the string "str" in a list of "items" and return the matching
+/// strings in "fmatchlist".
+/// If "matchseq" is true, then for multi-word search strings, match all the
+/// words in sequence.
+/// If "items" is a list of strings, then search for "str" in the list.
+/// If "items" is a list of dicts, then either use "key" to lookup the string
+/// for each item or use "item_cb" Funcref function to get the string.
+/// If "retmatchpos" is true, then return a list of positions where "str"
+/// matches for each item.
+static void fuzzy_match_in_list(list_T *const l, char *const str, const bool matchseq,
+ const char *const key, Callback *const item_cb,
+ const bool retmatchpos, list_T *const fmatchlist,
+ const int max_matches)
+ FUNC_ATTR_NONNULL_ARG(2, 5, 7)
+{
+ int len = tv_list_len(l);
+ if (len == 0) {
+ return;
+ }
+ if (max_matches > 0 && len > max_matches) {
+ len = max_matches;
+ }
+
+ fuzzyItem_T *const items = xcalloc((size_t)len, sizeof(fuzzyItem_T));
+ int match_count = 0;
+ uint32_t matches[FUZZY_MATCH_MAX_LEN];
+
+ // For all the string items in items, get the fuzzy matching score
+ TV_LIST_ITER(l, li, {
+ if (max_matches > 0 && match_count >= max_matches) {
+ break;
+ }
+
+ char *itemstr = NULL;
+ bool itemstr_allocate = false;
+ typval_T rettv;
+ rettv.v_type = VAR_UNKNOWN;
+ const typval_T *const tv = TV_LIST_ITEM_TV(li);
+ if (tv->v_type == VAR_STRING) { // list of strings
+ itemstr = tv->vval.v_string;
+ } else if (tv->v_type == VAR_DICT
+ && (key != NULL || item_cb->type != kCallbackNone)) {
+ // For a dict, either use the specified key to lookup the string or
+ // use the specified callback function to get the string.
+ if (key != NULL) {
+ itemstr = tv_dict_get_string(tv->vval.v_dict, key, false);
+ } else {
+ typval_T argv[2];
+
+ // Invoke the supplied callback (if any) to get the dict item
+ tv->vval.v_dict->dv_refcount++;
+ argv[0].v_type = VAR_DICT;
+ argv[0].vval.v_dict = tv->vval.v_dict;
+ argv[1].v_type = VAR_UNKNOWN;
+ if (callback_call(item_cb, 1, argv, &rettv)) {
+ if (rettv.v_type == VAR_STRING) {
+ itemstr = rettv.vval.v_string;
+ itemstr_allocate = true;
+ }
+ }
+ tv_dict_unref(tv->vval.v_dict);
+ }
+ }
+
+ int score;
+ if (itemstr != NULL
+ && fuzzy_match(itemstr, str, matchseq, &score, matches, FUZZY_MATCH_MAX_LEN)) {
+ items[match_count].idx = (int)match_count;
+ items[match_count].item = li;
+ items[match_count].score = score;
+ items[match_count].pat = str;
+ items[match_count].startpos = (int)matches[0];
+ items[match_count].itemstr = itemstr_allocate ? xstrdup(itemstr) : itemstr;
+ items[match_count].itemstr_allocated = itemstr_allocate;
+
+ // Copy the list of matching positions in itemstr to a list, if
+ // "retmatchpos" is set.
+ if (retmatchpos) {
+ items[match_count].lmatchpos = tv_list_alloc(kListLenMayKnow);
+ int j = 0;
+ const char *p = str;
+ while (*p != NUL && j < FUZZY_MATCH_MAX_LEN) {
+ if (!ascii_iswhite(utf_ptr2char(p)) || matchseq) {
+ tv_list_append_number(items[match_count].lmatchpos, matches[j]);
+ j++;
+ }
+ MB_PTR_ADV(p);
+ }
+ }
+ match_count++;
+ }
+ tv_clear(&rettv);
+ });
+
+ if (match_count > 0) {
+ // Sort the list by the descending order of the match score
+ qsort(items, (size_t)match_count, sizeof(fuzzyItem_T), fuzzy_match_item_compare);
+
+ // For matchfuzzy(), return a list of matched strings.
+ // ['str1', 'str2', 'str3']
+ // For matchfuzzypos(), return a list with three items.
+ // The first item is a list of matched strings. The second item
+ // is a list of lists where each list item is a list of matched
+ // character positions. The third item is a list of matching scores.
+ // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]]
+ list_T *retlist;
+ if (retmatchpos) {
+ const listitem_T *const li = tv_list_find(fmatchlist, 0);
+ assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
+ retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
+ } else {
+ retlist = fmatchlist;
+ }
+
+ // Copy the matching strings to the return list
+ for (int i = 0; i < match_count; i++) {
+ tv_list_append_tv(retlist, TV_LIST_ITEM_TV(items[i].item));
+ }
+
+ // next copy the list of matching positions
+ if (retmatchpos) {
+ const listitem_T *li = tv_list_find(fmatchlist, -2);
+ assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
+ retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
+
+ for (int i = 0; i < match_count; i++) {
+ assert(items[i].lmatchpos != NULL);
+ tv_list_append_list(retlist, items[i].lmatchpos);
+ items[i].lmatchpos = NULL;
+ }
+
+ // copy the matching scores
+ li = tv_list_find(fmatchlist, -1);
+ assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
+ retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
+ for (int i = 0; i < match_count; i++) {
+ tv_list_append_number(retlist, items[i].score);
+ }
+ }
+ }
+
+ for (int i = 0; i < match_count; i++) {
+ if (items[i].itemstr_allocated) {
+ xfree(items[i].itemstr);
+ }
+ assert(items[i].lmatchpos == NULL);
+ }
+ xfree(items);
+}
+
+/// Do fuzzy matching. Returns the list of matched strings in "rettv".
+/// If "retmatchpos" is true, also returns the matching character positions.
+static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv,
+ const bool retmatchpos)
+ FUNC_ATTR_NONNULL_ALL
+{
+ // validate and get the arguments
+ if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) {
+ semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()");
+ return;
+ }
+ if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) {
+ semsg(_(e_invarg2), tv_get_string(&argvars[1]));
+ return;
+ }
+
+ Callback cb = CALLBACK_NONE;
+ const char *key = NULL;
+ bool matchseq = false;
+ int max_matches = 0;
+ if (argvars[2].v_type != VAR_UNKNOWN) {
+ if (tv_check_for_nonnull_dict_arg(argvars, 2) == FAIL) {
+ return;
+ }
+
+ // To search a dict, either a callback function or a key can be
+ // specified.
+ dict_T *const d = argvars[2].vval.v_dict;
+ const dictitem_T *di;
+ if ((di = tv_dict_find(d, "key", -1)) != NULL) {
+ if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL
+ || *di->di_tv.vval.v_string == NUL) {
+ semsg(_(e_invargNval), "key", tv_get_string(&di->di_tv));
+ return;
+ }
+ key = tv_get_string(&di->di_tv);
+ } else if (!tv_dict_get_callback(d, "text_cb", -1, &cb)) {
+ semsg(_(e_invargval), "text_cb");
+ return;
+ }
+
+ if ((di = tv_dict_find(d, "limit", -1)) != NULL) {
+ if (di->di_tv.v_type != VAR_NUMBER) {
+ semsg(_(e_invargval), "limit");
+ return;
+ }
+ max_matches = (int)tv_get_number_chk(&di->di_tv, NULL);
+ }
+
+ if (tv_dict_has_key(d, "matchseq")) {
+ matchseq = true;
+ }
+ }
+
+ // get the fuzzy matches
+ tv_list_alloc_ret(rettv, retmatchpos ? 3 : kListLenUnknown);
+ if (retmatchpos) {
+ // For matchfuzzypos(), a list with three items are returned. First
+ // item is a list of matching strings, the second item is a list of
+ // lists with matching positions within each string and the third item
+ // is the list of scores of the matches.
+ tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
+ tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
+ tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
+ }
+
+ fuzzy_match_in_list(argvars[0].vval.v_list, (char *)tv_get_string(&argvars[1]),
+ matchseq, key, &cb, retmatchpos, rettv->vval.v_list, max_matches);
+
+ callback_free(&cb);
+}
+
+/// "matchfuzzy()" function
+void f_matchfuzzy(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ do_fuzzymatch(argvars, rettv, false);
+}
+
+/// "matchfuzzypos()" function
+void f_matchfuzzypos(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
+{
+ do_fuzzymatch(argvars, rettv, true);
+}
+
+/// Same as fuzzy_match_item_compare() except for use with a string match
+static int fuzzy_match_str_compare(const void *const s1, const void *const s2)
+ FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
+{
+ const int v1 = ((fuzmatch_str_T *)s1)->score;
+ const int v2 = ((fuzmatch_str_T *)s2)->score;
+ const int idx1 = ((fuzmatch_str_T *)s1)->idx;
+ const int idx2 = ((fuzmatch_str_T *)s2)->idx;
+
+ if (v1 == v2) {
+ return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
+ } else {
+ return v1 > v2 ? -1 : 1;
+ }
+}
+
+/// Sort fuzzy matches by score
+static void fuzzy_match_str_sort(fuzmatch_str_T *const fm, const int sz)
+ FUNC_ATTR_NONNULL_ALL
+{
+ // Sort the list by the descending order of the match score
+ qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_str_compare);
+}
+
+/// Same as fuzzy_match_item_compare() except for use with a function name
+/// string match. <SNR> functions should be sorted to the end.
+static int fuzzy_match_func_compare(const void *const s1, const void *const s2)
+ FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
+{
+ const int v1 = ((fuzmatch_str_T *)s1)->score;
+ const int v2 = ((fuzmatch_str_T *)s2)->score;
+ const int idx1 = ((fuzmatch_str_T *)s1)->idx;
+ const int idx2 = ((fuzmatch_str_T *)s2)->idx;
+ const char *const str1 = ((fuzmatch_str_T *)s1)->str;
+ const char *const str2 = ((fuzmatch_str_T *)s2)->str;
+
+ if (*str1 != '<' && *str2 == '<') {
+ return -1;
+ }
+ if (*str1 == '<' && *str2 != '<') {
+ return 1;
+ }
+ if (v1 == v2) {
+ return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
+ }
+ return v1 > v2 ? -1 : 1;
+}
+
+/// Sort fuzzy matches of function names by score.
+/// <SNR> functions should be sorted to the end.
+static void fuzzy_match_func_sort(fuzmatch_str_T *const fm, const int sz)
+ FUNC_ATTR_NONNULL_ALL
+{
+ // Sort the list by the descending order of the match score
+ qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_func_compare);
+}
+
+/// Fuzzy match "pat" in "str".
+/// @returns 0 if there is no match. Otherwise, returns the match score.
+int fuzzy_match_str(char *const str, const char *const pat)
+ FUNC_ATTR_WARN_UNUSED_RESULT
+{
+ if (str == NULL || pat == NULL) {
+ return 0;
+ }
+
+ int score = FUZZY_SCORE_NONE;
+ uint32_t matchpos[FUZZY_MATCH_MAX_LEN];
+ fuzzy_match(str, pat, true, &score, matchpos, ARRAY_SIZE(matchpos));
+
+ return score;
+}
+
+/// Fuzzy match the position of string "pat" in string "str".
+/// @returns a dynamic array of matching positions. If there is no match, returns NULL.
+garray_T *fuzzy_match_str_with_pos(char *const str, const char *const pat)
+{
+ if (str == NULL || pat == NULL) {
+ return NULL;
+ }
+
+ garray_T *match_positions = xmalloc(sizeof(garray_T));
+ ga_init(match_positions, sizeof(uint32_t), 10);
+
+ int score = FUZZY_SCORE_NONE;
+ uint32_t matches[FUZZY_MATCH_MAX_LEN];
+ if (!fuzzy_match(str, pat, false, &score, matches, FUZZY_MATCH_MAX_LEN)
+ || score == FUZZY_SCORE_NONE) {
+ ga_clear(match_positions);
+ xfree(match_positions);
+ return NULL;
+ }
+
+ int j = 0;
+ for (const char *p = pat; *p != NUL; MB_PTR_ADV(p)) {
+ if (!ascii_iswhite(utf_ptr2char(p))) {
+ GA_APPEND(uint32_t, match_positions, matches[j]);
+ j++;
+ }
+ }
+
+ return match_positions;
+}
+
+/// This function splits the line pointed to by `*ptr` into words and performs
+/// a fuzzy match for the pattern `pat` on each word. It iterates through the
+/// line, moving `*ptr` to the start of each word during the process.
+///
+/// If a match is found:
+/// - `*ptr` points to the start of the matched word.
+/// - `*len` is set to the length of the matched word.
+/// - `*score` contains the match score.
+///
+/// If no match is found, `*ptr` is updated to the end of the line.
+bool fuzzy_match_str_in_line(char **ptr, char *pat, int *len, pos_T *current_pos, int *score)
+{
+ char *str = *ptr;
+ char *strBegin = str;
+ char *end = NULL;
+ char *start = NULL;
+ bool found = false;
+
+ if (str == NULL || pat == NULL) {
+ return found;
+ }
+ char *line_end = find_line_end(str);
+
+ while (str < line_end) {
+ // Skip non-word characters
+ start = find_word_start(str);
+ if (*start == NUL) {
+ break;
+ }
+ end = find_word_end(start);
+
+ // Extract the word from start to end
+ char save_end = *end;
+ *end = NUL;
+
+ // Perform fuzzy match
+ *score = fuzzy_match_str(start, pat);
+ *end = save_end;
+
+ if (*score != FUZZY_SCORE_NONE) {
+ *len = (int)(end - start);
+ found = true;
+ *ptr = start;
+ if (current_pos) {
+ current_pos->col += (int)(end - strBegin);
+ }
+ break;
+ }
+
+ // Move to the end of the current word for the next iteration
+ str = end;
+ // Ensure we continue searching after the current word
+ while (*str != NUL && !vim_iswordp(str)) {
+ MB_PTR_ADV(str);
+ }
+ }
+
+ if (!found) {
+ *ptr = line_end;
+ }
+
+ return found;
+}
+
+/// Search for the next fuzzy match in the specified buffer.
+/// This function attempts to find the next occurrence of the given pattern
+/// in the buffer, starting from the current position. It handles line wrapping
+/// and direction of search.
+///
+/// Return true if a match is found, otherwise false.
+bool search_for_fuzzy_match(buf_T *buf, pos_T *pos, char *pattern, int dir, pos_T *start_pos,
+ int *len, char **ptr, int *score)
+{
+ pos_T current_pos = *pos;
+ pos_T circly_end;
+ bool found_new_match = false;
+ bool looped_around = false;
+
+ bool whole_line = ctrl_x_mode_whole_line();
+
+ if (buf == curbuf) {
+ circly_end = *start_pos;
+ } else {
+ circly_end.lnum = buf->b_ml.ml_line_count;
+ circly_end.col = 0;
+ circly_end.coladd = 0;
+ }
+
+ if (whole_line && start_pos->lnum != pos->lnum) {
+ current_pos.lnum += dir;
+ }
+
+ while (true) {
+ // Check if looped around and back to start position
+ if (looped_around && equalpos(current_pos, circly_end)) {
+ break;
+ }
+
+ // Ensure current_pos is valid
+ if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) {
+ // Get the current line buffer
+ *ptr = ml_get_buf(buf, current_pos.lnum);
+ if (!whole_line) {
+ *ptr += current_pos.col;
+ }
+
+ // If ptr is end of line is reached, move to next line
+ // or previous line based on direction
+ if (*ptr != NULL && **ptr != NUL) {
+ if (!whole_line) {
+ // Try to find a fuzzy match in the current line starting
+ // from current position
+ found_new_match = fuzzy_match_str_in_line(ptr, pattern,
+ len, ¤t_pos, score);
+ if (found_new_match) {
+ *pos = current_pos;
+ break;
+ } else if (looped_around && current_pos.lnum == circly_end.lnum) {
+ break;
+ }
+ } else {
+ if (fuzzy_match_str(*ptr, pattern) != FUZZY_SCORE_NONE) {
+ found_new_match = true;
+ *pos = current_pos;
+ *len = ml_get_buf_len(buf, current_pos.lnum);
+ break;
+ }
+ }
+ }
+ }
+
+ // Move to the next line or previous line based on direction
+ if (dir == FORWARD) {
+ if (++current_pos.lnum > buf->b_ml.ml_line_count) {
+ if (p_ws) {
+ current_pos.lnum = 1;
+ looped_around = true;
+ } else {
+ break;
+ }
+ }
+ } else {
+ if (--current_pos.lnum < 1) {
+ if (p_ws) {
+ current_pos.lnum = buf->b_ml.ml_line_count;
+ looped_around = true;
+ } else {
+ break;
+ }
+ }
+ }
+ current_pos.col = 0;
+ }
+
+ return found_new_match;
+}
+
+/// Free an array of fuzzy string matches "fuzmatch[count]".
+void fuzmatch_str_free(fuzmatch_str_T *const fuzmatch, int count)
+{
+ if (fuzmatch == NULL) {
+ return;
+ }
+ for (int i = 0; i < count; i++) {
+ xfree(fuzmatch[count].str);
+ }
+ xfree(fuzmatch);
+}
+
+/// Copy a list of fuzzy matches into a string list after sorting the matches by
+/// the fuzzy score. Frees the memory allocated for "fuzmatch".
+void fuzzymatches_to_strmatches(fuzmatch_str_T *const fuzmatch, char ***const matches,
+ const int count, const bool funcsort)
+ FUNC_ATTR_NONNULL_ARG(2)
+{
+ if (count <= 0) {
+ return;
+ }
+
+ *matches = xmalloc((size_t)count * sizeof(char *));
+
+ // Sort the list by the descending order of the match score
+ if (funcsort) {
+ fuzzy_match_func_sort(fuzmatch, count);
+ } else {
+ fuzzy_match_str_sort(fuzmatch, count);
+ }
+
+ for (int i = 0; i < count; i++) {
+ (*matches)[i] = fuzmatch[i].str;
+ }
+ xfree(fuzmatch);
+}
+
+/// Fuzzy match algorithm ported from https://github.com/jhawthorn/fzy.
+/// This implementation extends the original by supporting multibyte characters.
+
+#define MATCH_MAX_LEN FUZZY_MATCH_MAX_LEN
+
+#define SCORE_GAP_LEADING -0.005
+#define SCORE_GAP_TRAILING -0.005
+#define SCORE_GAP_INNER -0.01
+#define SCORE_MATCH_CONSECUTIVE 1.0
+#define SCORE_MATCH_SLASH 0.9
+#define SCORE_MATCH_WORD 0.8
+#define SCORE_MATCH_CAPITAL 0.7
+#define SCORE_MATCH_DOT 0.6
+
+static int has_match(const char *needle, const char *haystack)
+{
+ while (*needle != NUL) {
+ const int n_char = utf_ptr2char(needle);
+ const char *p = haystack;
+ bool matched = false;
+
+ while (*p != NUL) {
+ const int h_char = utf_ptr2char(p);
+
+ if (n_char == h_char || mb_toupper(n_char) == h_char) {
+ matched = true;
+ break;
+ }
+ p += utfc_ptr2len(p);
+ }
+
+ if (!matched) {
+ return 0;
+ }
+
+ needle += utfc_ptr2len(needle);
+ haystack = p + utfc_ptr2len(p);
+ }
+ return 1;
+}
+
+struct match_struct {
+ int needle_len;
+ int haystack_len;
+ int lower_needle[MATCH_MAX_LEN]; ///< stores codepoints
+ int lower_haystack[MATCH_MAX_LEN]; ///< stores codepoints
+ score_t match_bonus[MATCH_MAX_LEN];
+};
+
+#define IS_WORD_SEP(c) ((c) == '-' || (c) == '_' || (c) == ' ')
+#define IS_PATH_SEP(c) ((c) == '/')
+#define IS_DOT(c) ((c) == '.')
+
+static score_t compute_bonus_codepoint(int last_c, int c)
+{
+ if (ASCII_ISALNUM(c) || vim_iswordc(c)) {
+ if (IS_PATH_SEP(last_c)) {
+ return SCORE_MATCH_SLASH;
+ }
+ if (IS_WORD_SEP(last_c)) {
+ return SCORE_MATCH_WORD;
+ }
+ if (IS_DOT(last_c)) {
+ return SCORE_MATCH_DOT;
+ }
+ if (mb_isupper(c) && mb_islower(last_c)) {
+ return SCORE_MATCH_CAPITAL;
+ }
+ }
+ return 0;
+}
+
+static void setup_match_struct(match_struct *const match, const char *const needle,
+ const char *const haystack)
+{
+ int i = 0;
+ const char *p = needle;
+ while (*p != NUL && i < MATCH_MAX_LEN) {
+ const int c = utf_ptr2char(p);
+ match->lower_needle[i++] = mb_tolower(c);
+ MB_PTR_ADV(p);
+ }
+ match->needle_len = i;
+
+ i = 0;
+ p = haystack;
+ int prev_c = '/';
+ while (*p != NUL && i < MATCH_MAX_LEN) {
+ const int c = utf_ptr2char(p);
+ match->lower_haystack[i] = mb_tolower(c);
+ match->match_bonus[i] = compute_bonus_codepoint(prev_c, c);
+ prev_c = c;
+ MB_PTR_ADV(p);
+ i++;
+ }
+ match->haystack_len = i;
+}
+
+static inline void match_row(const match_struct *match, int row, score_t *curr_D, score_t *curr_M,
+ const score_t *last_D, const score_t *last_M)
+{
+ int n = match->needle_len;
+ int m = match->haystack_len;
+ int i = row;
+
+ const int *lower_needle = match->lower_needle;
+ const int *lower_haystack = match->lower_haystack;
+ const score_t *match_bonus = match->match_bonus;
+
+ score_t prev_score = (score_t)SCORE_MIN;
+ score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
+
+ // These will not be used with this value, but not all compilers see it
+ score_t prev_M = (score_t)SCORE_MIN, prev_D = (score_t)SCORE_MIN;
+
+ for (int j = 0; j < m; j++) {
+ if (lower_needle[i] == lower_haystack[j]) {
+ score_t score = (score_t)SCORE_MIN;
+ if (!i) {
+ score = (j * SCORE_GAP_LEADING) + match_bonus[j];
+ } else if (j) { // i > 0 && j > 0
+ score = MAX(prev_M + match_bonus[j],
+ // consecutive match, doesn't stack with match_bonus
+ prev_D + SCORE_MATCH_CONSECUTIVE);
+ }
+ prev_D = last_D[j];
+ prev_M = last_M[j];
+ curr_D[j] = score;
+ curr_M[j] = prev_score = MAX(score, prev_score + gap_score);
+ } else {
+ prev_D = last_D[j];
+ prev_M = last_M[j];
+ curr_D[j] = (score_t)SCORE_MIN;
+ curr_M[j] = prev_score = prev_score + gap_score;
+ }
+ }
+}
+
+static score_t match_positions(const char *const needle, const char *const haystack,
+ uint32_t *const positions)
+{
+ if (!*needle) {
+ return (score_t)SCORE_MIN;
+ }
+
+ match_struct match;
+ setup_match_struct(&match, needle, haystack);
+
+ int n = match.needle_len;
+ int m = match.haystack_len;
+
+ if (m > MATCH_MAX_LEN || n > m) {
+ // Unreasonably large candidate: return no score
+ // If it is a valid match it will still be returned, it will
+ // just be ranked below any reasonably sized candidates
+ return (score_t)SCORE_MIN;
+ } else if (n == m) {
+ // Since this method can only be called with a haystack which
+ // matches needle. If the lengths of the strings are equal the
+ // strings themselves must also be equal (ignoring case).
+ if (positions) {
+ for (int i = 0; i < n; i++) {
+ positions[i] = (uint32_t)i;
+ }
+ }
+ return (score_t)SCORE_MAX;
+ }
+
+ // D[][] Stores the best score for this position ending with a match.
+ // M[][] Stores the best possible score at this position.
+ score_t(*D)[MATCH_MAX_LEN] = xmalloc(sizeof(score_t) * MATCH_MAX_LEN * (size_t)n);
+ score_t(*M)[MATCH_MAX_LEN] = xmalloc(sizeof(score_t) * MATCH_MAX_LEN * (size_t)n);
+
+ match_row(&match, 0, D[0], M[0], D[0], M[0]);
+ for (int i = 1; i < n; i++) {
+ match_row(&match, i, D[i], M[i], D[i - 1], M[i - 1]);
+ }
+
+ // backtrace to find the positions of optimal matching
+ if (positions) {
+ int match_required = 0;
+ for (int i = n - 1, j = m - 1; i >= 0; i--) {
+ for (; j >= 0; j--) {
+ // There may be multiple paths which result in
+ // the optimal weight.
+ //
+ // For simplicity, we will pick the first one
+ // we encounter, the latest in the candidate
+ // string.
+ if (D[i][j] != (score_t)SCORE_MIN
+ && (match_required || D[i][j] == M[i][j])) {
+ // If this score was determined using
+ // SCORE_MATCH_CONSECUTIVE, the
+ // previous character MUST be a match
+ match_required = i && j
+ && M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE;
+ positions[i] = (uint32_t)(j--);
+ break;
+ }
+ }
+ }
+ }
+
+ score_t result = M[n - 1][m - 1];
+
+ xfree(M);
+ xfree(D);
+
+ return result;
+}
diff --git a/src/nvim/fuzzy.h b/src/nvim/fuzzy.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <limits.h>
+#include <stdint.h> // IWYU pragma: keep
+
+#include "nvim/buffer_defs.h" // IWYU pragma: keep
+#include "nvim/eval/typval_defs.h" // IWYU pragma: keep
+#include "nvim/garray_defs.h" // IWYU pragma: keep
+#include "nvim/pos_defs.h" // IWYU pragma: keep
+#include "nvim/types_defs.h" // IWYU pragma: keep
+
+enum { FUZZY_MATCH_MAX_LEN = 1024, }; ///< max characters that can be matched
+enum { FUZZY_SCORE_NONE = INT_MIN, }; ///< invalid fuzzy score
+
+/// Fuzzy matched string list item. Used for fuzzy match completion. Items are
+/// usually sorted by "score". The "idx" member is used for stable-sort.
+typedef struct {
+ int idx;
+ char *str;
+ int score;
+} fuzmatch_str_T;
+
+#ifdef INCLUDE_GENERATED_DECLARATIONS
+# include "fuzzy.h.generated.h"
+#endif
diff --git a/src/nvim/insexpand.c b/src/nvim/insexpand.c
@@ -34,6 +34,7 @@
#include "nvim/extmark.h"
#include "nvim/extmark_defs.h"
#include "nvim/fileio.h"
+#include "nvim/fuzzy.h"
#include "nvim/garray.h"
#include "nvim/garray_defs.h"
#include "nvim/getchar.h"
@@ -1000,7 +1001,7 @@ static int ins_compl_add(char *const str, int len, char *const fname, char *cons
// current match in the list of matches .
if (compl_first_match == NULL) {
match->cp_next = match->cp_prev = NULL;
- } else if (cfc_has_mode() && score > 0 && compl_get_longest) {
+ } else if (cfc_has_mode() && score != FUZZY_SCORE_NONE && compl_get_longest) {
compl_T *current = compl_first_match->cp_next;
compl_T *prev = compl_first_match;
inserted = false;
@@ -1188,7 +1189,8 @@ static void ins_compl_add_matches(int num_matches, char **matches, int icase)
for (int i = 0; i < num_matches && add_r != FAIL; i++) {
add_r = ins_compl_add(matches[i], -1, NULL, NULL, false, NULL, dir,
- CP_FAST | (icase ? CP_ICASE : 0), false, NULL, 0);
+ CP_FAST | (icase ? CP_ICASE : 0), false, NULL,
+ FUZZY_SCORE_NONE);
if (add_r == OK) {
// If dir was BACKWARD then honor it just once.
dir = FORWARD;
@@ -1356,7 +1358,7 @@ static int cp_compare_nearest(const void *a, const void *b)
{
int score_a = ((compl_T *)a)->cp_score;
int score_b = ((compl_T *)b)->cp_score;
- if (score_a == 0 || score_b == 0) {
+ if (score_a == FUZZY_SCORE_NONE || score_b == FUZZY_SCORE_NONE) {
return 0;
}
return (score_a > score_b) ? 1 : (score_a < score_b) ? -1 : 0;
@@ -1524,7 +1526,7 @@ static int ins_compl_build_pum(void)
if (!match_at_original_text(comp)
&& (leader->data == NULL
|| ins_compl_equal(comp, leader->data, leader->size)
- || (fuzzy_filter && comp->cp_score > 0))) {
+ || (fuzzy_filter && comp->cp_score != FUZZY_SCORE_NONE))) {
// Limit number of items from each source if max_items is set.
bool match_limit_exceeded = false;
int cur_source = comp->cp_cpt_source_idx;
@@ -1863,7 +1865,7 @@ static int thesaurus_add_words_in_line(char *fname, char **buf_arg, int dir, con
// Add the word. Skip the regexp match.
if (wstart != skip_word) {
status = ins_compl_add_infercase(wstart, (int)(ptr - wstart), p_ic,
- fname, dir, false, 0);
+ fname, dir, false, FUZZY_SCORE_NONE);
if (status == FAIL) {
break;
}
@@ -1909,7 +1911,8 @@ static void ins_compl_files(int count, char **files, bool thesaurus, int flags,
ptr = ctrl_x_mode_line_or_eval() ? find_line_end(ptr) : find_word_end(ptr);
int add_r = ins_compl_add_infercase(regmatch->startp[0],
(int)(ptr - regmatch->startp[0]),
- p_ic, files[i], *dir, false, 0);
+ p_ic, files[i], *dir, false,
+ FUZZY_SCORE_NONE);
if (thesaurus) {
// For a thesaurus, add all the words in the line
ptr = buf;
@@ -3218,7 +3221,7 @@ static int ins_compl_add_tv(typval_T *const tv, const Direction dir, bool fast)
return FAIL;
}
int status = ins_compl_add((char *)word, -1, NULL, cptext, true,
- &user_data, dir, flags, dup, user_hl, 0);
+ &user_data, dir, flags, dup, user_hl, FUZZY_SCORE_NONE);
if (status != OK) {
tv_clear(&user_data);
}
@@ -3314,7 +3317,7 @@ static void set_completion(colnr_T startcol, list_T *list)
}
if (ins_compl_add(compl_orig_text.data, (int)compl_orig_text.size,
NULL, NULL, false, NULL, 0,
- flags | CP_FAST, false, NULL, 0) != OK) {
+ flags | CP_FAST, false, NULL, FUZZY_SCORE_NONE) != OK) {
return;
}
@@ -4072,7 +4075,7 @@ static void get_next_filename_completion(void)
for (int i = 0; i < num_matches; i++) {
char *ptr = matches[i];
int score = fuzzy_match_str(ptr, leader);
- if (score > 0) {
+ if (score != FUZZY_SCORE_NONE) {
GA_APPEND(int, &fuzzy_indices, i);
compl_fuzzy_scores[i] = score;
}
@@ -4245,7 +4248,7 @@ static int get_next_default_completion(ins_compl_next_state_T *st, pos_T *start_
bool in_fuzzy_collect = (cfc_has_mode() && compl_length > 0)
|| ((get_cot_flags() & kOptCotFlagFuzzy) && compl_autocomplete);
char *leader = ins_compl_leader();
- int score = 0;
+ int score = FUZZY_SCORE_NONE;
const bool in_curbuf = st->ins_buf == curbuf;
// If 'infercase' is set, don't use 'smartcase' here
@@ -4343,7 +4346,6 @@ static int get_next_default_completion(ins_compl_next_state_T *st, pos_T *start_
if (score < 0) {
score = -score;
}
- score++;
}
if (ins_compl_add_infercase(ptr, len, p_ic,
@@ -4401,7 +4403,8 @@ static void get_register_completion(void)
compl_orig_text.size) == 0
: strncmp(str, compl_orig_text.data,
compl_orig_text.size) == 0)) {
- if (ins_compl_add_infercase(str, str_len, p_ic, NULL, dir, false, 0) == OK) {
+ if (ins_compl_add_infercase(str, str_len, p_ic, NULL,
+ dir, false, FUZZY_SCORE_NONE) == OK) {
dir = FORWARD;
}
}
@@ -4435,7 +4438,7 @@ static void get_register_completion(void)
: strncmp(p, compl_orig_text.data,
compl_orig_text.size) == 0))) {
if (ins_compl_add_infercase(p, len, p_ic, NULL,
- dir, false, 0) == OK) {
+ dir, false, FUZZY_SCORE_NONE) == OK) {
dir = FORWARD;
}
}
@@ -4553,7 +4556,7 @@ static void get_next_bufname_token(void)
char *tail = path_tail(b->b_sfname);
if (strncmp(tail, compl_orig_text.data, compl_orig_text.size) == 0) {
ins_compl_add(tail, (int)strlen(tail), NULL, NULL, false, NULL, 0,
- p_ic ? CP_ICASE : 0, false, NULL, 0);
+ p_ic ? CP_ICASE : 0, false, NULL, FUZZY_SCORE_NONE);
}
}
}
@@ -4796,7 +4799,8 @@ static int ins_compl_get_exp(pos_T *ini)
// For `^P` completion, reset `compl_curr_match` to the head to avoid
// mixing matches from different sources.
if (!compl_dir_forward()) {
- while (compl_curr_match->cp_prev) {
+ while (compl_curr_match->cp_prev
+ && !match_at_original_text(compl_curr_match->cp_prev)) {
compl_curr_match = compl_curr_match->cp_prev;
}
}
@@ -5145,7 +5149,7 @@ static int find_next_completion_match(bool allow_get_expansion, int todo, bool a
if (!match_at_original_text(compl_shown_match)
&& leader->data != NULL
&& !ins_compl_equal(compl_shown_match, leader->data, leader->size)
- && !(compl_fuzzy_match && compl_shown_match->cp_score > 0)) {
+ && !(compl_fuzzy_match && compl_shown_match->cp_score != FUZZY_SCORE_NONE)) {
todo++;
} else {
// Remember a matching item.
@@ -5919,7 +5923,7 @@ static int ins_compl_start(void)
}
if (ins_compl_add(compl_orig_text.data, (int)compl_orig_text.size,
NULL, NULL, false, NULL, 0,
- flags, false, NULL, 0) != OK) {
+ flags, false, NULL, FUZZY_SCORE_NONE) != OK) {
API_CLEAR_STRING(compl_pattern);
API_CLEAR_STRING(compl_orig_text);
kv_destroy(compl_orig_extmarks);
diff --git a/src/nvim/mapping.c b/src/nvim/mapping.c
@@ -27,6 +27,7 @@
#include "nvim/eval/userfunc.h"
#include "nvim/ex_cmds_defs.h"
#include "nvim/ex_session.h"
+#include "nvim/fuzzy.h"
#include "nvim/garray.h"
#include "nvim/garray_defs.h"
#include "nvim/getchar.h"
@@ -50,7 +51,6 @@
#include "nvim/regexp.h"
#include "nvim/regexp_defs.h"
#include "nvim/runtime.h"
-#include "nvim/search.h"
#include "nvim/state_defs.h"
#include "nvim/strings.h"
#include "nvim/types_defs.h"
@@ -1340,7 +1340,7 @@ int ExpandMappings(char *pat, regmatch_T *regmatch, int *numMatches, char ***mat
match = vim_regexec(regmatch, p, 0);
} else {
score = fuzzy_match_str(p, pat);
- match = (score != 0);
+ match = (score != FUZZY_SCORE_NONE);
}
if (!match) {
@@ -1386,7 +1386,7 @@ int ExpandMappings(char *pat, regmatch_T *regmatch, int *numMatches, char ***mat
match = vim_regexec(regmatch, p, 0);
} else {
score = fuzzy_match_str(p, pat);
- match = (score != 0);
+ match = (score != FUZZY_SCORE_NONE);
}
if (!match) {
diff --git a/src/nvim/option.c b/src/nvim/option.c
@@ -56,6 +56,7 @@
#include "nvim/ex_getln.h"
#include "nvim/ex_session.h"
#include "nvim/fold.h"
+#include "nvim/fuzzy.h"
#include "nvim/garray.h"
#include "nvim/garray_defs.h"
#include "nvim/gettext_defs.h"
@@ -97,7 +98,6 @@
#include "nvim/regexp.h"
#include "nvim/regexp_defs.h"
#include "nvim/runtime.h"
-#include "nvim/search.h"
#include "nvim/spell.h"
#include "nvim/spellfile.h"
#include "nvim/spellsuggest.h"
@@ -5578,7 +5578,7 @@ static bool match_str(char *const str, regmatch_T *const regmatch, char **const
}
} else {
const int score = fuzzy_match_str(str, fuzzystr);
- if (score != 0) {
+ if (score != FUZZY_SCORE_NONE) {
if (!test_only) {
fuzmatch[idx].idx = idx;
fuzmatch[idx].str = xstrdup(str);
diff --git a/src/nvim/popupmenu.c b/src/nvim/popupmenu.c
@@ -21,8 +21,7 @@
#include "nvim/eval/typval.h"
#include "nvim/ex_cmds.h"
#include "nvim/ex_cmds_defs.h"
-#include "nvim/extmark.h"
-#include "nvim/extmark_defs.h"
+#include "nvim/fuzzy.h"
#include "nvim/garray.h"
#include "nvim/garray_defs.h"
#include "nvim/getchar.h"
@@ -33,22 +32,18 @@
#include "nvim/highlight_defs.h"
#include "nvim/insexpand.h"
#include "nvim/keycodes.h"
-#include "nvim/mark.h"
#include "nvim/mbyte.h"
-#include "nvim/memline.h"
#include "nvim/memory.h"
#include "nvim/memory_defs.h"
#include "nvim/menu.h"
#include "nvim/message.h"
#include "nvim/move.h"
-#include "nvim/ops.h"
#include "nvim/option.h"
#include "nvim/option_defs.h"
#include "nvim/option_vars.h"
#include "nvim/plines.h"
#include "nvim/popupmenu.h"
#include "nvim/pos_defs.h"
-#include "nvim/search.h"
#include "nvim/state_defs.h"
#include "nvim/strings.h"
#include "nvim/types_defs.h"
diff --git a/src/nvim/quickfix.c b/src/nvim/quickfix.c
@@ -35,6 +35,7 @@
#include "nvim/extmark.h"
#include "nvim/fileio.h"
#include "nvim/fold.h"
+#include "nvim/fuzzy.h"
#include "nvim/garray.h"
#include "nvim/garray_defs.h"
#include "nvim/gettext_defs.h"
@@ -5487,7 +5488,7 @@ static bool vgr_match_buflines(qf_list_T *qfl, char *fname, buf_T *buf, char *sp
{
bool found_match = false;
size_t pat_len = strlen(spat);
- pat_len = MIN(pat_len, MAX_FUZZY_MATCHES);
+ pat_len = MIN(pat_len, FUZZY_MATCH_MAX_LEN);
for (linenr_T lnum = 1; lnum <= buf->b_ml.ml_line_count && *tomatch > 0; lnum++) {
colnr_T col = 0;
@@ -5533,12 +5534,12 @@ static bool vgr_match_buflines(qf_list_T *qfl, char *fname, buf_T *buf, char *sp
char *const str = ml_get_buf(buf, lnum);
const colnr_T linelen = ml_get_buf_len(buf, lnum);
int score;
- uint32_t matches[MAX_FUZZY_MATCHES];
+ uint32_t matches[FUZZY_MATCH_MAX_LEN];
const size_t sz = sizeof(matches) / sizeof(matches[0]);
// Fuzzy string match
CLEAR_FIELD(matches);
- while (fuzzy_match(str + col, spat, false, &score, matches, (int)sz, true) > 0) {
+ while (fuzzy_match(str + col, spat, false, &score, matches, (int)sz) > 0) {
// Pass the buffer number so that it gets used even for a
// dummy buffer, unless duplicate_name is set, then the
// buffer will be wiped out below.
diff --git a/src/nvim/search.c b/src/nvim/search.c
@@ -29,8 +29,6 @@
#include "nvim/file_search.h"
#include "nvim/fileio.h"
#include "nvim/fold.h"
-#include "nvim/garray.h"
-#include "nvim/garray_defs.h"
#include "nvim/getchar.h"
#include "nvim/gettext_defs.h"
#include "nvim/globals.h"
@@ -2910,925 +2908,6 @@ the_end:
restore_incsearch_state();
}
-/// Fuzzy string matching
-///
-/// Ported from the lib_fts library authored by Forrest Smith.
-/// https://github.com/forrestthewoods/lib_fts/tree/master/code
-///
-/// The following blog describes the fuzzy matching algorithm:
-/// https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/
-///
-/// Each matching string is assigned a score. The following factors are checked:
-/// - Matched letter
-/// - Unmatched letter
-/// - Consecutively matched letters
-/// - Proximity to start
-/// - Letter following a separator (space, underscore)
-/// - Uppercase letter following lowercase (aka CamelCase)
-///
-/// Matched letters are good. Unmatched letters are bad. Matching near the start
-/// is good. Matching the first letter in the middle of a phrase is good.
-/// Matching the uppercase letters in camel case entries is good.
-///
-/// The score assigned for each factor is explained below.
-/// File paths are different from file names. File extensions may be ignorable.
-/// Single words care about consecutive matches but not separators or camel
-/// case.
-/// Score starts at 100
-/// Matched letter: +0 points
-/// Unmatched letter: -1 point
-/// Consecutive match bonus: +15 points
-/// First letter bonus: +15 points
-/// Separator bonus: +30 points
-/// Camel case bonus: +30 points
-/// Unmatched leading letter: -5 points (max: -15)
-///
-/// There is some nuance to this. Scores don’t have an intrinsic meaning. The
-/// score range isn’t 0 to 100. It’s roughly [50, 150]. Longer words have a
-/// lower minimum score due to unmatched letter penalty. Longer search patterns
-/// have a higher maximum score due to match bonuses.
-///
-/// Separator and camel case bonus is worth a LOT. Consecutive matches are worth
-/// quite a bit.
-///
-/// There is a penalty if you DON’T match the first three letters. Which
-/// effectively rewards matching near the start. However there’s no difference
-/// in matching between the middle and end.
-///
-/// There is not an explicit bonus for an exact match. Unmatched letters receive
-/// a penalty. So shorter strings and closer matches are worth more.
-typedef struct {
- int idx; ///< used for stable sort
- listitem_T *item;
- int score;
- list_T *lmatchpos;
-} fuzzyItem_T;
-
-/// bonus for adjacent matches; this is higher than SEPARATOR_BONUS so that
-/// matching a whole word is preferred.
-#define SEQUENTIAL_BONUS 40
-/// bonus if match occurs after a path separator
-#define PATH_SEPARATOR_BONUS 30
-/// bonus if match occurs after a word separator
-#define WORD_SEPARATOR_BONUS 25
-/// bonus if match is uppercase and prev is lower
-#define CAMEL_BONUS 30
-/// bonus if the first letter is matched
-#define FIRST_LETTER_BONUS 15
-/// bonus if exact match
-#define EXACT_MATCH_BONUS 100
-/// bonus if case match when no ignorecase
-#define CASE_MATCH_BONUS 25
-/// penalty applied for every letter in str before the first match
-#define LEADING_LETTER_PENALTY (-5)
-/// maximum penalty for leading letters
-#define MAX_LEADING_LETTER_PENALTY (-15)
-/// penalty for every letter that doesn't match
-#define UNMATCHED_LETTER_PENALTY (-1)
-/// penalty for gap in matching positions (-2 * k)
-#define GAP_PENALTY (-2)
-/// Score for a string that doesn't fuzzy match the pattern
-#define SCORE_NONE (-9999)
-
-#define FUZZY_MATCH_RECURSION_LIMIT 10
-
-/// Compute a score for a fuzzy matched string. The matching character locations
-/// are in "matches".
-static int fuzzy_match_compute_score(const char *const fuzpat, const char *const str,
- const int strSz, const uint32_t *const matches,
- const int numMatches, bool camelcase)
- FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
-{
- assert(numMatches > 0); // suppress clang "result of operation is garbage"
- const char *p = str;
- uint32_t sidx = 0;
- bool is_exact_match = true;
- const char *const orig_fuzpat = fuzpat - numMatches;
- const char *curpat = orig_fuzpat;
- int pat_idx = 0;
- // Track consecutive camel case matches
- int consecutive_camel = 0;
-
- // Initialize score
- int score = 100;
-
- // Apply leading letter penalty
- int penalty = LEADING_LETTER_PENALTY * (int)matches[0];
- if (penalty < MAX_LEADING_LETTER_PENALTY) {
- penalty = MAX_LEADING_LETTER_PENALTY;
- }
- score += penalty;
-
- // Apply unmatched penalty
- const int unmatched = strSz - numMatches;
- score += UNMATCHED_LETTER_PENALTY * unmatched;
- // In a long string, not all matches may be found due to the recursion limit.
- // If at least one match is found, reset the score to a non-negative value.
- if (score < 0 && numMatches > 0) {
- score = 0;
- }
-
- // Apply ordering bonuses
- for (int i = 0; i < numMatches; i++) {
- const uint32_t currIdx = matches[i];
- bool is_camel = false;
-
- if (i > 0) {
- const uint32_t prevIdx = matches[i - 1];
-
- // Sequential
- if (currIdx == prevIdx + 1) {
- score += SEQUENTIAL_BONUS;
- } else {
- score += GAP_PENALTY * (int)(currIdx - prevIdx);
- // Reset consecutive camel count on gap
- consecutive_camel = 0;
- }
- }
-
- int curr;
- // Check for bonuses based on neighbor character value
- if (currIdx > 0) {
- // Camel case
- int neighbor = ' ';
-
- while (sidx < currIdx) {
- neighbor = utf_ptr2char(p);
- MB_PTR_ADV(p);
- sidx++;
- }
- curr = utf_ptr2char(p);
-
- // Enhanced camel case scoring
- if (camelcase && mb_islower(neighbor) && mb_isupper(curr)) {
- score += CAMEL_BONUS * 2; // Double the camel case bonus
- is_camel = true;
- consecutive_camel++;
- // Additional bonus for consecutive camel
- if (consecutive_camel > 1) {
- score += CAMEL_BONUS;
- }
- } else {
- consecutive_camel = 0;
- }
-
- // Bonus if the match follows a separator character
- if (neighbor == '/' || neighbor == '\\') {
- score += PATH_SEPARATOR_BONUS;
- } else if (neighbor == ' ' || neighbor == '_') {
- score += WORD_SEPARATOR_BONUS;
- }
- } else {
- // First letter
- score += FIRST_LETTER_BONUS;
- curr = utf_ptr2char(p);
- }
-
- // Case matching bonus
- if (mb_isalpha(curr)) {
- while (pat_idx < i && *curpat) {
- MB_PTR_ADV(curpat);
- pat_idx++;
- }
-
- if (curr == utf_ptr2char(curpat)) {
- score += CASE_MATCH_BONUS;
- // Extra bonus for exact case match in camel
- if (is_camel) {
- score += CASE_MATCH_BONUS / 2;
- }
- }
- }
-
- // Check exact match condition
- if (currIdx != (uint32_t)i) {
- is_exact_match = false;
- }
- }
-
- // Boost score for exact matches
- if (is_exact_match && numMatches == strSz) {
- score += EXACT_MATCH_BONUS;
- }
-
- return score;
-}
-
-/// Perform a recursive search for fuzzy matching "fuzpat" in "str".
-/// @return the number of matching characters.
-static int fuzzy_match_recursive(const char *fuzpat, const char *str, uint32_t strIdx,
- int *const outScore, const char *const strBegin, const int strLen,
- const uint32_t *const srcMatches, uint32_t *const matches,
- const int maxMatches, int nextMatch, int *const recursionCount,
- bool camelcase)
- FUNC_ATTR_NONNULL_ARG(1, 2, 4, 5, 8, 11) FUNC_ATTR_WARN_UNUSED_RESULT
-{
- // Recursion params
- bool recursiveMatch = false;
- uint32_t bestRecursiveMatches[MAX_FUZZY_MATCHES];
- int bestRecursiveScore = 0;
-
- // Count recursions
- (*recursionCount)++;
- if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) {
- return 0;
- }
-
- // Detect end of strings
- if (*fuzpat == NUL || *str == NUL) {
- return 0;
- }
-
- // Loop through fuzpat and str looking for a match
- bool first_match = true;
- while (*fuzpat != NUL && *str != NUL) {
- const int c1 = utf_ptr2char(fuzpat);
- const int c2 = utf_ptr2char(str);
-
- // Found match
- if (mb_tolower(c1) == mb_tolower(c2)) {
- // Supplied matches buffer was too short
- if (nextMatch >= maxMatches) {
- return 0;
- }
-
- int recursiveScore = 0;
- uint32_t recursiveMatches[MAX_FUZZY_MATCHES];
- CLEAR_FIELD(recursiveMatches);
-
- // "Copy-on-Write" srcMatches into matches
- if (first_match && srcMatches != NULL) {
- memcpy(matches, srcMatches, (size_t)nextMatch * sizeof(srcMatches[0]));
- first_match = false;
- }
-
- // Recursive call that "skips" this match
- const char *const next_char = str + utfc_ptr2len(str);
- if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, &recursiveScore, strBegin, strLen,
- matches, recursiveMatches,
- sizeof(recursiveMatches) / sizeof(recursiveMatches[0]), nextMatch,
- recursionCount, camelcase)) {
- // Pick best recursive score
- if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
- memcpy(bestRecursiveMatches, recursiveMatches,
- MAX_FUZZY_MATCHES * sizeof(recursiveMatches[0]));
- bestRecursiveScore = recursiveScore;
- }
- recursiveMatch = true;
- }
-
- // Advance
- matches[nextMatch++] = strIdx;
- MB_PTR_ADV(fuzpat);
- }
- MB_PTR_ADV(str);
- strIdx++;
- }
-
- // Determine if full fuzpat was matched
- const bool matched = *fuzpat == NUL;
-
- // Calculate score
- if (matched) {
- *outScore = fuzzy_match_compute_score(fuzpat, strBegin, strLen, matches, nextMatch, camelcase);
- }
-
- // Return best result
- if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) {
- // Recursive score is better than "this"
- memcpy(matches, bestRecursiveMatches, (size_t)maxMatches * sizeof(matches[0]));
- *outScore = bestRecursiveScore;
- return nextMatch;
- } else if (matched) {
- return nextMatch; // "this" score is better than recursive
- }
-
- return 0; // no match
-}
-
-/// fuzzy_match()
-///
-/// Performs exhaustive search via recursion to find all possible matches and
-/// match with highest score.
-/// Scores values have no intrinsic meaning. Possible score range is not
-/// normalized and varies with pattern.
-/// Recursion is limited internally (default=10) to prevent degenerate cases
-/// (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").
-/// Patterns are limited to MAX_FUZZY_MATCHES characters.
-///
-/// @return true if "pat_arg" matches "str". Also returns the match score in
-/// "outScore" and the matching character positions in "matches".
-bool fuzzy_match(char *const str, const char *const pat_arg, const bool matchseq,
- int *const outScore, uint32_t *const matches, const int maxMatches, bool camelcase)
- FUNC_ATTR_NONNULL_ALL
-{
- const int len = mb_charlen(str);
- bool complete = false;
- int numMatches = 0;
-
- *outScore = 0;
-
- char *const save_pat = xstrdup(pat_arg);
- char *pat = save_pat;
- char *p = pat;
-
- // Try matching each word in "pat_arg" in "str"
- while (true) {
- if (matchseq) {
- complete = true;
- } else {
- // Extract one word from the pattern (separated by space)
- p = skipwhite(p);
- if (*p == NUL) {
- break;
- }
- pat = p;
- while (*p != NUL && !ascii_iswhite(utf_ptr2char(p))) {
- MB_PTR_ADV(p);
- }
- if (*p == NUL) { // processed all the words
- complete = true;
- }
- *p = NUL;
- }
-
- int score = 0;
- int recursionCount = 0;
- const int matchCount
- = fuzzy_match_recursive(pat, str, 0, &score, str, len, NULL,
- matches + numMatches,
- maxMatches - numMatches, 0, &recursionCount, camelcase);
- if (matchCount == 0) {
- numMatches = 0;
- break;
- }
-
- // Accumulate the match score and the number of matches
- *outScore += score;
- numMatches += matchCount;
-
- if (complete) {
- break;
- }
-
- // try matching the next word
- p++;
- }
-
- xfree(save_pat);
- return numMatches != 0;
-}
-
-/// Sort the fuzzy matches in the descending order of the match score.
-/// For items with same score, retain the order using the index (stable sort)
-static int fuzzy_match_item_compare(const void *const s1, const void *const s2)
- FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
-{
- const int v1 = ((const fuzzyItem_T *)s1)->score;
- const int v2 = ((const fuzzyItem_T *)s2)->score;
- const int idx1 = ((const fuzzyItem_T *)s1)->idx;
- const int idx2 = ((const fuzzyItem_T *)s2)->idx;
-
- if (v1 == v2) {
- return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
- }
- return v1 > v2 ? -1 : 1;
-}
-
-/// Fuzzy search the string "str" in a list of "items" and return the matching
-/// strings in "fmatchlist".
-/// If "matchseq" is true, then for multi-word search strings, match all the
-/// words in sequence.
-/// If "items" is a list of strings, then search for "str" in the list.
-/// If "items" is a list of dicts, then either use "key" to lookup the string
-/// for each item or use "item_cb" Funcref function to get the string.
-/// If "retmatchpos" is true, then return a list of positions where "str"
-/// matches for each item.
-static void fuzzy_match_in_list(list_T *const l, char *const str, const bool matchseq,
- const char *const key, Callback *const item_cb,
- const bool retmatchpos, list_T *const fmatchlist,
- const int max_matches, bool camelcase)
- FUNC_ATTR_NONNULL_ARG(2, 5, 7)
-{
- int len = tv_list_len(l);
- if (len == 0) {
- return;
- }
- if (max_matches > 0 && len > max_matches) {
- len = max_matches;
- }
-
- fuzzyItem_T *const items = xcalloc((size_t)len, sizeof(fuzzyItem_T));
- int match_count = 0;
- uint32_t matches[MAX_FUZZY_MATCHES];
-
- // For all the string items in items, get the fuzzy matching score
- TV_LIST_ITER(l, li, {
- if (max_matches > 0 && match_count >= max_matches) {
- break;
- }
-
- char *itemstr = NULL;
- typval_T rettv;
- rettv.v_type = VAR_UNKNOWN;
- const typval_T *const tv = TV_LIST_ITEM_TV(li);
- if (tv->v_type == VAR_STRING) { // list of strings
- itemstr = tv->vval.v_string;
- } else if (tv->v_type == VAR_DICT && (key != NULL || item_cb->type != kCallbackNone)) {
- // For a dict, either use the specified key to lookup the string or
- // use the specified callback function to get the string.
- if (key != NULL) {
- itemstr = tv_dict_get_string(tv->vval.v_dict, key, false);
- } else {
- typval_T argv[2];
-
- // Invoke the supplied callback (if any) to get the dict item
- tv->vval.v_dict->dv_refcount++;
- argv[0].v_type = VAR_DICT;
- argv[0].vval.v_dict = tv->vval.v_dict;
- argv[1].v_type = VAR_UNKNOWN;
- if (callback_call(item_cb, 1, argv, &rettv)) {
- if (rettv.v_type == VAR_STRING) {
- itemstr = rettv.vval.v_string;
- }
- }
- tv_dict_unref(tv->vval.v_dict);
- }
- }
-
- int score;
- if (itemstr != NULL && fuzzy_match(itemstr, str, matchseq, &score, matches,
- MAX_FUZZY_MATCHES, camelcase)) {
- items[match_count].idx = (int)match_count;
- items[match_count].item = li;
- items[match_count].score = score;
-
- // Copy the list of matching positions in itemstr to a list, if
- // "retmatchpos" is set.
- if (retmatchpos) {
- items[match_count].lmatchpos = tv_list_alloc(kListLenMayKnow);
- int j = 0;
- const char *p = str;
- while (*p != NUL) {
- if (!ascii_iswhite(utf_ptr2char(p)) || matchseq) {
- tv_list_append_number(items[match_count].lmatchpos, matches[j]);
- j++;
- }
- MB_PTR_ADV(p);
- }
- }
- match_count++;
- }
- tv_clear(&rettv);
- });
-
- if (match_count > 0) {
- // Sort the list by the descending order of the match score
- qsort(items, (size_t)match_count, sizeof(fuzzyItem_T), fuzzy_match_item_compare);
-
- // For matchfuzzy(), return a list of matched strings.
- // ['str1', 'str2', 'str3']
- // For matchfuzzypos(), return a list with three items.
- // The first item is a list of matched strings. The second item
- // is a list of lists where each list item is a list of matched
- // character positions. The third item is a list of matching scores.
- // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]]
- list_T *retlist;
- if (retmatchpos) {
- const listitem_T *const li = tv_list_find(fmatchlist, 0);
- assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
- retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
- } else {
- retlist = fmatchlist;
- }
-
- // Copy the matching strings with a valid score to the return list
- for (int i = 0; i < match_count; i++) {
- if (items[i].score == SCORE_NONE) {
- break;
- }
- tv_list_append_tv(retlist, TV_LIST_ITEM_TV(items[i].item));
- }
-
- // next copy the list of matching positions
- if (retmatchpos) {
- const listitem_T *li = tv_list_find(fmatchlist, -2);
- assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
- retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
-
- for (int i = 0; i < match_count; i++) {
- if (items[i].score == SCORE_NONE) {
- break;
- }
- tv_list_append_list(retlist, items[i].lmatchpos);
- }
-
- // copy the matching scores
- li = tv_list_find(fmatchlist, -1);
- assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
- retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
- for (int i = 0; i < match_count; i++) {
- if (items[i].score == SCORE_NONE) {
- break;
- }
- tv_list_append_number(retlist, items[i].score);
- }
- }
- }
- xfree(items);
-}
-
-/// Do fuzzy matching. Returns the list of matched strings in "rettv".
-/// If "retmatchpos" is true, also returns the matching character positions.
-static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv,
- const bool retmatchpos)
- FUNC_ATTR_NONNULL_ALL
-{
- // validate and get the arguments
- if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) {
- semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()");
- return;
- }
- if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) {
- semsg(_(e_invarg2), tv_get_string(&argvars[1]));
- return;
- }
-
- Callback cb = CALLBACK_NONE;
- const char *key = NULL;
- bool matchseq = false;
- int max_matches = 0;
- bool camelcase = true;
- if (argvars[2].v_type != VAR_UNKNOWN) {
- if (tv_check_for_nonnull_dict_arg(argvars, 2) == FAIL) {
- return;
- }
-
- // To search a dict, either a callback function or a key can be
- // specified.
- dict_T *const d = argvars[2].vval.v_dict;
- const dictitem_T *di;
- if ((di = tv_dict_find(d, "key", -1)) != NULL) {
- if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL
- || *di->di_tv.vval.v_string == NUL) {
- semsg(_(e_invargNval), "key", tv_get_string(&di->di_tv));
- return;
- }
- key = tv_get_string(&di->di_tv);
- } else if (!tv_dict_get_callback(d, "text_cb", -1, &cb)) {
- semsg(_(e_invargval), "text_cb");
- return;
- }
-
- if ((di = tv_dict_find(d, "limit", -1)) != NULL) {
- if (di->di_tv.v_type != VAR_NUMBER) {
- semsg(_(e_invargval), "limit");
- return;
- }
- max_matches = (int)tv_get_number_chk(&di->di_tv, NULL);
- }
-
- if ((di = tv_dict_find(d, "camelcase", -1)) != NULL) {
- if (di->di_tv.v_type != VAR_BOOL) {
- semsg(_(e_invargval), "camelcase");
- return;
- }
- camelcase = tv_get_bool_chk(&di->di_tv, NULL);
- }
-
- if (tv_dict_find(d, "matchseq", -1) != NULL) {
- matchseq = true;
- }
- }
-
- // get the fuzzy matches
- tv_list_alloc_ret(rettv, retmatchpos ? 3 : kListLenUnknown);
- if (retmatchpos) {
- // For matchfuzzypos(), a list with three items are returned. First
- // item is a list of matching strings, the second item is a list of
- // lists with matching positions within each string and the third item
- // is the list of scores of the matches.
- tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
- tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
- tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
- }
-
- fuzzy_match_in_list(argvars[0].vval.v_list,
- (char *)tv_get_string(&argvars[1]), matchseq, key,
- &cb, retmatchpos, rettv->vval.v_list, max_matches, camelcase);
- callback_free(&cb);
-}
-
-/// "matchfuzzy()" function
-void f_matchfuzzy(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
-{
- do_fuzzymatch(argvars, rettv, false);
-}
-
-/// "matchfuzzypos()" function
-void f_matchfuzzypos(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
-{
- do_fuzzymatch(argvars, rettv, true);
-}
-
-/// Same as fuzzy_match_item_compare() except for use with a string match
-static int fuzzy_match_str_compare(const void *const s1, const void *const s2)
- FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
-{
- const int v1 = ((fuzmatch_str_T *)s1)->score;
- const int v2 = ((fuzmatch_str_T *)s2)->score;
- const int idx1 = ((fuzmatch_str_T *)s1)->idx;
- const int idx2 = ((fuzmatch_str_T *)s2)->idx;
-
- if (v1 == v2) {
- return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
- } else {
- return v1 > v2 ? -1 : 1;
- }
-}
-
-/// Sort fuzzy matches by score
-static void fuzzy_match_str_sort(fuzmatch_str_T *const fm, const int sz)
- FUNC_ATTR_NONNULL_ALL
-{
- // Sort the list by the descending order of the match score
- qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_str_compare);
-}
-
-/// Same as fuzzy_match_item_compare() except for use with a function name
-/// string match. <SNR> functions should be sorted to the end.
-static int fuzzy_match_func_compare(const void *const s1, const void *const s2)
- FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
-{
- const int v1 = ((fuzmatch_str_T *)s1)->score;
- const int v2 = ((fuzmatch_str_T *)s2)->score;
- const int idx1 = ((fuzmatch_str_T *)s1)->idx;
- const int idx2 = ((fuzmatch_str_T *)s2)->idx;
- const char *const str1 = ((fuzmatch_str_T *)s1)->str;
- const char *const str2 = ((fuzmatch_str_T *)s2)->str;
-
- if (*str1 != '<' && *str2 == '<') {
- return -1;
- }
- if (*str1 == '<' && *str2 != '<') {
- return 1;
- }
- if (v1 == v2) {
- return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
- }
- return v1 > v2 ? -1 : 1;
-}
-
-/// Sort fuzzy matches of function names by score.
-/// <SNR> functions should be sorted to the end.
-static void fuzzy_match_func_sort(fuzmatch_str_T *const fm, const int sz)
- FUNC_ATTR_NONNULL_ALL
-{
- // Sort the list by the descending order of the match score
- qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_func_compare);
-}
-
-/// Fuzzy match "pat" in "str".
-/// @returns 0 if there is no match. Otherwise, returns the match score.
-int fuzzy_match_str(char *const str, const char *const pat)
- FUNC_ATTR_WARN_UNUSED_RESULT
-{
- if (str == NULL || pat == NULL) {
- return 0;
- }
-
- int score = 0;
- uint32_t matchpos[MAX_FUZZY_MATCHES];
- fuzzy_match(str, pat, true, &score, matchpos, sizeof(matchpos) / sizeof(matchpos[0]), true);
-
- return score;
-}
-
-/// Fuzzy match the position of string "pat" in string "str".
-/// @returns a dynamic array of matching positions. If there is no match, returns NULL.
-garray_T *fuzzy_match_str_with_pos(char *const str, const char *const pat)
-{
- if (str == NULL || pat == NULL) {
- return NULL;
- }
-
- garray_T *match_positions = xmalloc(sizeof(garray_T));
- ga_init(match_positions, sizeof(uint32_t), 10);
-
- unsigned matches[MAX_FUZZY_MATCHES];
- int score = 0;
- if (!fuzzy_match(str, pat, false, &score, matches, MAX_FUZZY_MATCHES, true)
- || score == 0) {
- ga_clear(match_positions);
- xfree(match_positions);
- return NULL;
- }
-
- int j = 0;
- for (const char *p = pat; *p != NUL; MB_PTR_ADV(p)) {
- if (!ascii_iswhite(utf_ptr2char(p))) {
- GA_APPEND(uint32_t, match_positions, matches[j]);
- j++;
- }
- }
-
- return match_positions;
-}
-
-/// This function splits the line pointed to by `*ptr` into words and performs
-/// a fuzzy match for the pattern `pat` on each word. It iterates through the
-/// line, moving `*ptr` to the start of each word during the process.
-///
-/// If a match is found:
-/// - `*ptr` points to the start of the matched word.
-/// - `*len` is set to the length of the matched word.
-/// - `*score` contains the match score.
-///
-/// If no match is found, `*ptr` is updated to the end of the line.
-bool fuzzy_match_str_in_line(char **ptr, char *pat, int *len, pos_T *current_pos, int *score)
-{
- char *str = *ptr;
- char *strBegin = str;
- char *end = NULL;
- char *start = NULL;
- bool found = false;
-
- if (str == NULL || pat == NULL) {
- return found;
- }
- char *line_end = find_line_end(str);
-
- while (str < line_end) {
- // Skip non-word characters
- start = find_word_start(str);
- if (*start == NUL) {
- break;
- }
- end = find_word_end(start);
-
- // Extract the word from start to end
- char save_end = *end;
- *end = NUL;
-
- // Perform fuzzy match
- *score = fuzzy_match_str(start, pat);
- *end = save_end;
-
- if (*score > 0) {
- *len = (int)(end - start);
- found = true;
- *ptr = start;
- if (current_pos) {
- current_pos->col += (int)(end - strBegin);
- }
- break;
- }
-
- // Move to the end of the current word for the next iteration
- str = end;
- // Ensure we continue searching after the current word
- while (*str != NUL && !vim_iswordp(str)) {
- MB_PTR_ADV(str);
- }
- }
-
- if (!found) {
- *ptr = line_end;
- }
-
- return found;
-}
-
-/// Search for the next fuzzy match in the specified buffer.
-/// This function attempts to find the next occurrence of the given pattern
-/// in the buffer, starting from the current position. It handles line wrapping
-/// and direction of search.
-///
-/// Return true if a match is found, otherwise false.
-bool search_for_fuzzy_match(buf_T *buf, pos_T *pos, char *pattern, int dir, pos_T *start_pos,
- int *len, char **ptr, int *score)
-{
- pos_T current_pos = *pos;
- pos_T circly_end;
- bool found_new_match = false;
- bool looped_around = false;
-
- bool whole_line = ctrl_x_mode_whole_line();
-
- if (buf == curbuf) {
- circly_end = *start_pos;
- } else {
- circly_end.lnum = buf->b_ml.ml_line_count;
- circly_end.col = 0;
- circly_end.coladd = 0;
- }
-
- if (whole_line && start_pos->lnum != pos->lnum) {
- current_pos.lnum += dir;
- }
-
- while (true) {
- // Check if looped around and back to start position
- if (looped_around && equalpos(current_pos, circly_end)) {
- break;
- }
-
- // Ensure current_pos is valid
- if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) {
- // Get the current line buffer
- *ptr = ml_get_buf(buf, current_pos.lnum);
- if (!whole_line) {
- *ptr += current_pos.col;
- }
-
- // If ptr is end of line is reached, move to next line
- // or previous line based on direction
- if (*ptr != NULL && **ptr != NUL) {
- if (!whole_line) {
- // Try to find a fuzzy match in the current line starting
- // from current position
- found_new_match = fuzzy_match_str_in_line(ptr, pattern,
- len, ¤t_pos, score);
- if (found_new_match) {
- *pos = current_pos;
- break;
- } else if (looped_around && current_pos.lnum == circly_end.lnum) {
- break;
- }
- } else {
- if (fuzzy_match_str(*ptr, pattern) > 0) {
- found_new_match = true;
- *pos = current_pos;
- *len = ml_get_buf_len(buf, current_pos.lnum);
- break;
- }
- }
- }
- }
-
- // Move to the next line or previous line based on direction
- if (dir == FORWARD) {
- if (++current_pos.lnum > buf->b_ml.ml_line_count) {
- if (p_ws) {
- current_pos.lnum = 1;
- looped_around = true;
- } else {
- break;
- }
- }
- } else {
- if (--current_pos.lnum < 1) {
- if (p_ws) {
- current_pos.lnum = buf->b_ml.ml_line_count;
- looped_around = true;
- } else {
- break;
- }
- }
- }
- current_pos.col = 0;
- }
-
- return found_new_match;
-}
-
-/// Copy a list of fuzzy matches into a string list after sorting the matches by
-/// the fuzzy score. Frees the memory allocated for "fuzmatch".
-void fuzzymatches_to_strmatches(fuzmatch_str_T *const fuzmatch, char ***const matches,
- const int count, const bool funcsort)
- FUNC_ATTR_NONNULL_ARG(2)
-{
- if (count <= 0) {
- return;
- }
-
- *matches = xmalloc((size_t)count * sizeof(char *));
-
- // Sort the list by the descending order of the match score
- if (funcsort) {
- fuzzy_match_func_sort(fuzmatch, count);
- } else {
- fuzzy_match_str_sort(fuzmatch, count);
- }
-
- for (int i = 0; i < count; i++) {
- (*matches)[i] = fuzmatch[i].str;
- }
- xfree(fuzmatch);
-}
-
-/// Free a list of fuzzy string matches.
-void fuzmatch_str_free(fuzmatch_str_T *const fuzmatch, int count)
-{
- if (count <= 0 || fuzmatch == NULL) {
- return;
- }
- while (count--) {
- xfree(fuzmatch[count].str);
- }
- xfree(fuzmatch);
-}
-
/// Get line "lnum" and copy it into "buf[LSIZE]".
/// The copy is made because the regexp may make the line invalid when using a
/// mark.
diff --git a/src/nvim/search.h b/src/nvim/search.h
@@ -67,11 +67,6 @@ enum { SEARCH_STAT_DEF_TIMEOUT = 40, };
// '[>9999/>9999]': 13 + 1 (NUL)
enum { SEARCH_STAT_BUF_LEN = 16, };
-enum {
- /// Maximum number of characters that can be fuzzy matched
- MAX_FUZZY_MATCHES = 256,
-};
-
/// Structure containing offset definition for the last search pattern
///
/// @note Only offset for the last search pattern is used, not for the last
@@ -112,14 +107,6 @@ typedef struct {
int last_maxcount; // the max count of the last search
} searchstat_T;
-/// Fuzzy matched string list item. Used for fuzzy match completion. Items are
-/// usually sorted by "score". The "idx" member is used for stable-sort.
-typedef struct {
- int idx;
- char *str;
- int score;
-} fuzmatch_str_T;
-
#ifdef INCLUDE_GENERATED_DECLARATIONS
# include "search.h.generated.h"
#endif
diff --git a/test/old/testdir/test_cmdline.vim b/test/old/testdir/test_cmdline.vim
@@ -3262,7 +3262,7 @@ endfunc
func Test_fuzzy_completion_bufname_fullpath()
CheckUnix
set wildoptions&
- call mkdir('Xcmd/Xstate/Xfile.js', 'p')
+ call mkdir('Xcmd/Xstate/Xfile.js', 'pR')
edit Xcmd/Xstate/Xfile.js
cd Xcmd/Xstate
enew
@@ -3270,9 +3270,8 @@ func Test_fuzzy_completion_bufname_fullpath()
call assert_equal('"b CmdStateFile', @:)
set wildoptions=fuzzy
call feedkeys(":b CmdStateFile\<Tab>\<C-B>\"\<CR>", 'tx')
- call assert_match('Xcmd/Xstate/Xfile.js$', @:)
+ call assert_equal('"b CmdStateFile', @:)
cd -
- call delete('Xcmd', 'rf')
set wildoptions&
endfunc
@@ -3551,7 +3550,7 @@ func Test_fuzzy_completion_mapname()
nmap <Plug>state :
nmap <Plug>FendingOff :
call feedkeys(":nmap <Plug>fo\<C-A>\<C-B>\"\<CR>", 'tx')
- call assert_equal("\"nmap <Plug>format <Plug>TestFOrmat <Plug>FendingOff <Plug>goformat <Plug>fendoff", @:)
+ call assert_equal("\"nmap <Plug>format <Plug>TestFOrmat <Plug>FendingOff <Plug>fendoff <Plug>goformat", @:)
nunmap <Plug>format
nunmap <Plug>goformat
nunmap <Plug>TestFOrmat
@@ -3673,9 +3672,7 @@ func Test_fuzzy_completion_syntax_group()
call assert_equal('"syntax list mpar', @:)
set wildoptions=fuzzy
call feedkeys(":syntax list mpar\<Tab>\<C-B>\"\<CR>", 'tx')
- " Fuzzy match prefers NvimParenthesis over MatchParen
- " call assert_equal('"syntax list MatchParen', @:)
- call assert_equal('"syntax list NvimParenthesis', @:)
+ call assert_equal('"syntax list MatchParen', @:)
set wildoptions&
endfunc
@@ -3726,7 +3723,7 @@ func Test_fuzzy_completion_cmd_sort_results()
command T123FendingOff :
set wildoptions=fuzzy
call feedkeys(":T123fo\<C-A>\<C-B>\"\<CR>", 'tx')
- call assert_equal('"T123format T123TestFOrmat T123FendingOff T123goformat T123fendoff', @:)
+ call assert_equal('"T123format T123TestFOrmat T123FendingOff T123fendoff T123goformat', @:)
delcommand T123format
delcommand T123goformat
delcommand T123TestFOrmat
diff --git a/test/old/testdir/test_matchfuzzy.vim b/test/old/testdir/test_matchfuzzy.vim
@@ -18,11 +18,11 @@ func Test_matchfuzzy()
call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra'))
call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa'))
call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one'))
- call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo'))
- call assert_equal(['onetwo', 'one_two'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo'))
+ call assert_equal(['oneTwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo'))
+ call assert_equal([], matchfuzzy(['onetwo', 'one_two'], 'oneTwo'))
call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa'))
call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len())
- call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257)))
+ call assert_equal([repeat('a', 300)], matchfuzzy([repeat('a', 300)], repeat('a', 257)))
" full match has highest score
call assert_equal(['Cursor', 'lCursor'], matchfuzzy(["hello", "lCursor", "Cursor"], "Cursor"))
" matches with same score should not be reordered
@@ -30,8 +30,7 @@ func Test_matchfuzzy()
call assert_equal(l, l->matchfuzzy('abc'))
" Tests for match preferences
- " preference for camel case match
- call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo'))
+ call assert_equal(['onetwo', 'oneTwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo'))
" preference for match after a separator (_ or space)
call assert_equal(['onetwo', 'one_two', 'one two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo'))
" preference for leading letter match
@@ -47,7 +46,7 @@ func Test_matchfuzzy()
" gap penalty
call assert_equal(['xxayybxxxx', 'xxayyybxxx', 'xxayyyybxx'], ['xxayyyybxx', 'xxayyybxxx', 'xxayybxxxx']->matchfuzzy('ab'))
" path separator vs word separator
- call assert_equal(['color/setup.vim', 'color\\setup.vim', 'color setup.vim', 'color_setup.vim', 'colorsetup.vim'], matchfuzzy(['colorsetup.vim', 'color setup.vim', 'color/setup.vim', 'color_setup.vim', 'color\\setup.vim'], 'setup.vim'))
+ call assert_equal(['color/setup.vim', 'color setup.vim', 'color_setup.vim', 'colorsetup.vim', 'color\\setup.vim'], matchfuzzy(['colorsetup.vim', 'color setup.vim', 'color/setup.vim', 'color_setup.vim', 'color\\setup.vim'], 'setup.vim'))
" match multiple words (separated by space)
call assert_equal(['foo bar baz'], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzy('baz foo'))
@@ -92,15 +91,16 @@ func Test_matchfuzzy()
call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:')
" camelcase
- call assert_equal(['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'],
- \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur'))
- call assert_equal(['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'],
- \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:true}))
call assert_equal(['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'],
- \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:false}))
+ \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur'))
call assert_equal(['things', 'sThings', 'thisThings'],
- \ matchfuzzy(['things','sThings', 'thisThings'], 'thin', {'camelcase': v:false}))
- call assert_fails("let x = matchfuzzy([], 'foo', {'camelcase': []})", 'E475: Invalid value for argument camelcase')
+ \ matchfuzzy(['things','sThings', 'thisThings'], 'thin'))
+ call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['mytestcase', 'MyTestCase'], 'mtc'))
+ call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['MyTestCase', 'mytestcase'], 'mtc'))
+ call assert_equal(['MyTest'], matchfuzzy(['Mytest', 'mytest', 'MyTest'], 'MyT'))
+ call assert_equal(['CamelCaseMatchIngAlg'],
+ \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], 'CamelCase'))
+ call assert_equal(['SomeWord', 'PrefixSomeWord'], matchfuzzy(['PrefixSomeWord', 'SomeWord'], 'somewor'))
" Test in latin1 encoding
let save_enc = &encoding
@@ -112,50 +112,57 @@ endfunc
" Test for the matchfuzzypos() function
func Test_matchfuzzypos()
- call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'curl'], 'rl'))
- call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'one', 'curl'], 'rl'))
+ call assert_equal([['curl', 'world'], [[2,3], [2,3]], [990, 985]], matchfuzzypos(['world', 'curl'], 'rl'))
+ call assert_equal([['curl', 'world'], [[2,3], [2,3]], [990, 985]], matchfuzzypos(['world', 'one', 'curl'], 'rl'))
call assert_equal([['hello', 'hello world hello world'],
- \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [500, 382]],
+ \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [2147483647, 4810]],
\ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello'))
- call assert_equal([['aaaaaaa'], [[0, 1, 2]], [266]], matchfuzzypos(['aaaaaaa'], 'aaa'))
- call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b'))
- call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b'))
- call assert_equal([['a b'], [[0]], [137]], matchfuzzypos(['a b'], ' a '))
+ call assert_equal([['aaaaaaa'], [[0, 1, 2]], [2880]], matchfuzzypos(['aaaaaaa'], 'aaa'))
+ call assert_equal([['a b'], [[0, 3]], [1670]], matchfuzzypos(['a b'], 'a b'))
+ call assert_equal([['a b'], [[0, 3]], [1670]], matchfuzzypos(['a b'], 'a b'))
+ call assert_equal([['a b'], [[0]], [885]], matchfuzzypos(['a b'], ' a '))
call assert_equal([[], [], []], matchfuzzypos(['a b'], ' '))
call assert_equal([[], [], []], matchfuzzypos(['world', 'curl'], 'ab'))
let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256))
call assert_equal(range(256), x[1][0])
- call assert_equal([[], [], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257)))
+
+ " fuzzy matches limited to 1024 chars
+ let matches = matchfuzzypos([repeat('a', 1030)], repeat('a', 1025))
+ call assert_equal([repeat('a', 1030)], matches[0])
+ call assert_equal(1024, len(matches[1][0]))
+ call assert_equal(1023, matches[1][0][1023])
+
call assert_equal([[], [], []], matchfuzzypos([], 'abc'))
+ call assert_equal([[], [], []], matchfuzzypos(['abc'], ''))
" match in a long string
- call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [155]],
+ call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [500]],
\ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc'))
" preference for camel case match
- call assert_equal([['xabcxxaBc'], [[6, 7, 8]], [269]], matchfuzzypos(['xabcxxaBc'], 'abc'))
+ call assert_equal([['xabcxxaBc'], [[7, 8]], [1665]], matchfuzzypos(['xabcxxaBc'], 'bc'))
" preference for match after a separator (_ or space)
- call assert_equal([['xabx_ab'], [[5, 6]], [195]], matchfuzzypos(['xabx_ab'], 'ab'))
+ call assert_equal([['xabx_ab'], [[5, 6]], [1775]], matchfuzzypos(['xabx_ab'], 'ab'))
" preference for leading letter match
- call assert_equal([['abcxabc'], [[0, 1]], [200]], matchfuzzypos(['abcxabc'], 'ab'))
+ call assert_equal([['abcxabc'], [[0, 1]], [1875]], matchfuzzypos(['abcxabc'], 'ab'))
" preference for sequential match
- call assert_equal([['aobncedone'], [[7, 8, 9]], [233]], matchfuzzypos(['aobncedone'], 'one'))
+ call assert_equal([['aobncedone'], [[7, 8, 9]], [1965]], matchfuzzypos(['aobncedone'], 'one'))
" best recursive match
- call assert_equal([['xoone'], [[2, 3, 4]], [243]], matchfuzzypos(['xoone'], 'one'))
+ call assert_equal([['xoone'], [[2, 3, 4]], [1990]], matchfuzzypos(['xoone'], 'one'))
" match multiple words (separated by space)
- call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo'))
+ call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [5620]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo'))
call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo', {'matchseq': 1}))
- call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz'))
- call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 5, 10]], [476]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1}))
+ call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [5620]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz'))
+ call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 9, 10]], [6660]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1}))
call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('one two'))
call assert_equal([[], [], []], ['foo bar']->matchfuzzypos(" \t "))
- call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [1057]], ['grace']->matchfuzzypos('race ace grace'))
+ call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [2147483647]], ['grace']->matchfuzzypos('race ace grace'))
let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}]
- call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]],
+ call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [2885]],
\ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}}))
- call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]],
+ call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [2885]],
\ matchfuzzypos(l, 'cam', {'key' : 'val'}))
call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}}))
call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'key' : 'val'}))
@@ -175,30 +182,17 @@ func Test_matchfuzzypos()
" call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:')
" case match
- call assert_equal([['Match', 'match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['match', 'Match'], 'Ma'))
- call assert_equal([['match', 'Match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['Match', 'match'], 'ma'))
- " CamelCase has high weight even case match
- call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['mytestcase', 'MyTestCase'], 'mtc'))
- call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['MyTestCase', 'mytestcase'], 'mtc'))
- call assert_equal(['MyTest', 'Mytest', 'mytest', ],matchfuzzy(['Mytest', 'mytest', 'MyTest'], 'MyT'))
- call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'],
- \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelCase'))
- call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'],
- \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelcaseM'))
+ call assert_equal([['Match'], [[0, 1]], [1885]], matchfuzzypos(['match', 'Match'], 'Ma'))
+ call assert_equal([['match', 'Match'], [[0, 1], [0, 1]], [1885, 1885]], matchfuzzypos(['Match', 'match'], 'ma'))
let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}]
call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:')
" camelcase
- call assert_equal([['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], [[1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8], [0, 1, 2], [0, 1, 2], [0, 1, 2]], [318, 311, 308, 303, 267, 264, 263]],
+ call assert_equal([['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8]], [2885, 2870, 2865, 2680, 2670, 2655, 2655]],
\ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur'))
- call assert_equal([['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], [[1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8], [0, 1, 2], [0, 1, 2], [0, 1, 2]], [318, 311, 308, 303, 267, 264, 263]],
- \ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:true}))
- call assert_equal([['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8]], [267, 264, 263, 246, 239, 236, 231]],
- \ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:false}))
- call assert_equal([['things', 'sThings', 'thisThings'], [[0, 1, 2, 3], [1, 2, 3, 4], [0, 1, 2, 7]], [333, 287, 279]],
- \ matchfuzzypos(['things','sThings', 'thisThings'], 'thin', {'camelcase': v:false}))
- call assert_fails("let x = matchfuzzypos([], 'foo', {'camelcase': []})", 'E475: Invalid value for argument camelcase')
+ call assert_equal([['things', 'sThings', 'thisThings'], [[0, 1, 2, 3], [1, 2, 3, 4], [0, 1, 6, 7]], [3890, 3685, 3670]],
+ \ matchfuzzypos(['things','sThings', 'thisThings'], 'thin'))
endfunc
" Test for matchfuzzy() with multibyte characters
@@ -216,9 +210,14 @@ func Test_matchfuzzy_mbyte()
call assert_equal(['세 마리의 작은 돼지'], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('돼지 마리의'))
call assert_equal([], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('파란 하늘'))
- " preference for camel case match
+ " camel case match
+ call assert_equal(['oneąwo', 'oneĄwo'],
+ \ ['oneĄwo', 'oneąwo']->matchfuzzy('oneąwo'))
+ call assert_equal(['oneĄwo'],
+ \ ['oneĄwo', 'oneąwo']->matchfuzzy('Ąwo'))
+ " prefer camelcase when searched first character matches upper case
call assert_equal(['oneĄwo', 'oneąwo'],
- \ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo'))
+ \ ['oneĄwo', 'oneąwo']->matchfuzzy('ąw'))
" preference for complete match then match after separator (_ or space)
call assert_equal(['ⅠⅡabㄟㄠ'] + sort(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']),
\ ['ⅠⅡabㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡa_bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ'))
@@ -242,41 +241,39 @@ endfunc
" Test for matchfuzzypos() with multibyte characters
func Test_matchfuzzypos_mbyte()
CheckFeature multi_lang
- call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]], [273]],
+ call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]], [4890]],
\ matchfuzzypos(['こんにちは世界'], 'こんにちは'))
- call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [88]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ'))
+ call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [-20]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ'))
" reverse the order of characters
call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ'))
- call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [252, 143]],
+ call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [2885, 670]],
\ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ'))
call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'],
- \ [[0, 1], [0, 1], [0, 1], [0, 2]], [176, 173, 170, 135]],
+ \ [[0, 1], [0, 1], [0, 1], [0, 2]], [1880, 1865, 1850, 890]],
\ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ'))
- call assert_equal([['ααααααα'], [[0, 1, 2]], [216]],
+ call assert_equal([['ααααααα'], [[0, 1, 2]], [2880]],
\ matchfuzzypos(['ααααααα'], 'ααα'))
call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl'))
let x = matchfuzzypos([repeat('Ψ', 256)], repeat('Ψ', 256))
call assert_equal(range(256), x[1][0])
- call assert_equal([[], [], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257)))
+ call assert_equal([repeat('✓', 300)], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))[0])
" match multiple words (separated by space)
- call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]], [328]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의'))
+ call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]], [4515]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의'))
call assert_equal([[], [], []], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('파란 하늘'))
" match in a long string
- call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [105]],
+ call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [500]],
\ matchfuzzypos([repeat('ぶ', 300) .. 'ẼẼẼ'], 'ẼẼẼ'))
- " preference for camel case match
- call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]], [219]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ'))
" preference for match after a separator (_ or space)
- call assert_equal([['xちだx_ちだ'], [[5, 6]], [145]], matchfuzzypos(['xちだx_ちだ'], 'ちだ'))
+ call assert_equal([['xちだx_ちだ'], [[5, 6]], [1775]], matchfuzzypos(['xちだx_ちだ'], 'ちだ'))
" preference for leading letter match
- call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]], [150]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ'))
+ call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]], [1875]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ'))
" preference for sequential match
- call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]], [158]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ'))
+ call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]], [1965]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ'))
" best recursive match
- call assert_equal([['xффйд'], [[2, 3, 4]], [168]], matchfuzzypos(['xффйд'], 'фйд'))
+ call assert_equal([['xффйд'], [[2, 3, 4]], [1990]], matchfuzzypos(['xффйд'], 'фйд'))
endfunc
" Test for matchfuzzy() with limit