neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

fuzzy.c (29250B)


      1 // fuzzy.c: fuzzy matching algorithm and related functions
      2 //
      3 // Portions of this file are adapted from fzy (https://github.com/jhawthorn/fzy)
      4 // Original code:
      5 //   Copyright (c) 2014 John Hawthorn
      6 //   Licensed under the MIT License.
      7 //
      8 // Permission is hereby granted, free of charge, to any person obtaining a copy
      9 // of this software and associated documentation files (the "Software"), to deal
     10 // in the Software without restriction, including without limitation the rights
     11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     12 // copies of the Software, and to permit persons to whom the Software is
     13 // furnished to do so, subject to the following conditions:
     14 //
     15 // The above copyright notice and this permission notice shall be included in
     16 // all copies or substantial portions of the Software.
     17 //
     18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     24 // THE SOFTWARE.
     25 
     26 #include <assert.h>
     27 #include <limits.h>
     28 #include <math.h>
     29 #include <stdbool.h>
     30 #include <stdint.h>
     31 #include <stdlib.h>
     32 #include <string.h>
     33 
     34 #include "nvim/ascii_defs.h"
     35 #include "nvim/charset.h"
     36 #include "nvim/errors.h"
     37 #include "nvim/eval.h"
     38 #include "nvim/eval/typval.h"
     39 #include "nvim/fuzzy.h"
     40 #include "nvim/garray.h"
     41 #include "nvim/garray_defs.h"
     42 #include "nvim/globals.h"
     43 #include "nvim/insexpand.h"
     44 #include "nvim/macros_defs.h"
     45 #include "nvim/mbyte.h"
     46 #include "nvim/memline.h"
     47 #include "nvim/memory.h"
     48 #include "nvim/message.h"
     49 
     50 typedef double score_t;
     51 
     52 #define SCORE_MAX INFINITY
     53 #define SCORE_MIN (-INFINITY)
     54 #define SCORE_SCALE 1000
     55 
     56 typedef struct {
     57  int idx;  ///< used for stable sort
     58  listitem_T *item;
     59  int score;
     60  list_T *lmatchpos;
     61  char *pat;
     62  char *itemstr;
     63  bool itemstr_allocated;
     64  int startpos;
     65 } fuzzyItem_T;
     66 
     67 typedef struct match_struct match_struct;
     68 
     69 #include "fuzzy.c.generated.h"
     70 
     71 /// fuzzy_match()
     72 ///
     73 /// @return true if "pat_arg" matches "str". Also returns the match score in
     74 /// "outScore" and the matching character positions in "matches".
     75 bool fuzzy_match(char *const str, const char *const pat_arg, const bool matchseq,
     76                 int *const outScore, uint32_t *const matches, const int maxMatches)
     77  FUNC_ATTR_NONNULL_ALL
     78 {
     79  bool complete = false;
     80  int numMatches = 0;
     81 
     82  *outScore = 0;
     83 
     84  char *const save_pat = xstrdup(pat_arg);
     85  char *pat = save_pat;
     86  char *p = pat;
     87 
     88  // Try matching each word in "pat_arg" in "str"
     89  while (true) {
     90    if (matchseq) {
     91      complete = true;
     92    } else {
     93      // Extract one word from the pattern (separated by space)
     94      p = skipwhite(p);
     95      if (*p == NUL) {
     96        break;
     97      }
     98      pat = p;
     99      while (*p != NUL && !ascii_iswhite(utf_ptr2char(p))) {
    100        MB_PTR_ADV(p);
    101      }
    102      if (*p == NUL) {  // processed all the words
    103        complete = true;
    104      }
    105      *p = NUL;
    106    }
    107 
    108    int score = FUZZY_SCORE_NONE;
    109    if (has_match(pat, str)) {
    110      score_t fzy_score = match_positions(pat, str, matches + numMatches);
    111      score = (fzy_score == (score_t)SCORE_MIN
    112               ? INT_MIN + 1
    113               : (fzy_score == (score_t)SCORE_MAX
    114                  ? INT_MAX
    115                  : (fzy_score < 0
    116                     ? (int)ceil(fzy_score * SCORE_SCALE - 0.5)
    117                     : (int)floor(fzy_score * SCORE_SCALE + 0.5))));
    118    }
    119 
    120    if (score == FUZZY_SCORE_NONE) {
    121      numMatches = 0;
    122      *outScore = FUZZY_SCORE_NONE;
    123      break;
    124    }
    125 
    126    if (score > 0 && *outScore > INT_MAX - score) {
    127      *outScore = INT_MAX;
    128    } else if (score < 0 && *outScore < INT_MIN + 1 - score) {
    129      *outScore = INT_MIN + 1;
    130    } else {
    131      *outScore += score;
    132    }
    133 
    134    numMatches += mb_charlen(pat);
    135 
    136    if (complete || numMatches >= maxMatches) {
    137      break;
    138    }
    139 
    140    // try matching the next word
    141    p++;
    142  }
    143 
    144  xfree(save_pat);
    145  return numMatches != 0;
    146 }
    147 
    148 /// Sort the fuzzy matches in the descending order of the match score.
    149 /// For items with same score, retain the order using the index (stable sort)
    150 static int fuzzy_match_item_compare(const void *const s1, const void *const s2)
    151  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
    152 {
    153  const int v1 = ((const fuzzyItem_T *)s1)->score;
    154  const int v2 = ((const fuzzyItem_T *)s2)->score;
    155 
    156  if (v1 == v2) {
    157    const char *const pat = ((const fuzzyItem_T *)s1)->pat;
    158    const size_t patlen = strlen(pat);
    159    int startpos = ((const fuzzyItem_T *)s1)->startpos;
    160    const bool exact_match1 = startpos >= 0
    161                              && strncmp(pat, ((fuzzyItem_T *)s1)->itemstr + startpos, patlen) == 0;
    162    startpos = ((const fuzzyItem_T *)s2)->startpos;
    163    const bool exact_match2 = startpos >= 0
    164                              && strncmp(pat, ((fuzzyItem_T *)s2)->itemstr + startpos, patlen) == 0;
    165 
    166    if (exact_match1 == exact_match2) {
    167      const int idx1 = ((const fuzzyItem_T *)s1)->idx;
    168      const int idx2 = ((const fuzzyItem_T *)s2)->idx;
    169      return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
    170    } else if (exact_match2) {
    171      return 1;
    172    }
    173    return -1;
    174  } else {
    175    return v1 > v2 ? -1 : 1;
    176  }
    177 }
    178 
    179 /// Fuzzy search the string "str" in a list of "items" and return the matching
    180 /// strings in "fmatchlist".
    181 /// If "matchseq" is true, then for multi-word search strings, match all the
    182 /// words in sequence.
    183 /// If "items" is a list of strings, then search for "str" in the list.
    184 /// If "items" is a list of dicts, then either use "key" to lookup the string
    185 /// for each item or use "item_cb" Funcref function to get the string.
    186 /// If "retmatchpos" is true, then return a list of positions where "str"
    187 /// matches for each item.
    188 static void fuzzy_match_in_list(list_T *const l, char *const str, const bool matchseq,
    189                                const char *const key, Callback *const item_cb,
    190                                const bool retmatchpos, list_T *const fmatchlist,
    191                                const int max_matches)
    192  FUNC_ATTR_NONNULL_ARG(2, 5, 7)
    193 {
    194  int len = tv_list_len(l);
    195  if (len == 0) {
    196    return;
    197  }
    198  if (max_matches > 0 && len > max_matches) {
    199    len = max_matches;
    200  }
    201 
    202  fuzzyItem_T *const items = xcalloc((size_t)len, sizeof(fuzzyItem_T));
    203  int match_count = 0;
    204  uint32_t matches[FUZZY_MATCH_MAX_LEN];
    205 
    206  // For all the string items in items, get the fuzzy matching score
    207  TV_LIST_ITER(l, li, {
    208    if (max_matches > 0 && match_count >= max_matches) {
    209      break;
    210    }
    211 
    212    char *itemstr = NULL;
    213    bool itemstr_allocate = false;
    214    typval_T rettv;
    215 
    216    rettv.v_type = VAR_UNKNOWN;
    217    const typval_T *const tv = TV_LIST_ITEM_TV(li);
    218    if (tv->v_type == VAR_STRING) {  // list of strings
    219      itemstr = tv->vval.v_string;
    220    } else if (tv->v_type == VAR_DICT
    221               && (key != NULL || item_cb->type != kCallbackNone)) {
    222      // For a dict, either use the specified key to lookup the string or
    223      // use the specified callback function to get the string.
    224      if (key != NULL) {
    225        itemstr = tv_dict_get_string(tv->vval.v_dict, key, false);
    226      } else {
    227        typval_T argv[2];
    228 
    229        // Invoke the supplied callback (if any) to get the dict item
    230        tv->vval.v_dict->dv_refcount++;
    231        argv[0].v_type = VAR_DICT;
    232        argv[0].vval.v_dict = tv->vval.v_dict;
    233        argv[1].v_type = VAR_UNKNOWN;
    234        if (callback_call(item_cb, 1, argv, &rettv)) {
    235          if (rettv.v_type == VAR_STRING) {
    236            itemstr = rettv.vval.v_string;
    237            itemstr_allocate = true;
    238          }
    239        }
    240        tv_dict_unref(tv->vval.v_dict);
    241      }
    242    }
    243 
    244    int score;
    245    if (itemstr != NULL
    246        && fuzzy_match(itemstr, str, matchseq, &score, matches, FUZZY_MATCH_MAX_LEN)) {
    247      char *itemstr_copy = itemstr_allocate ? xstrdup(itemstr) : itemstr;
    248      list_T *match_positions = NULL;
    249 
    250      // Copy the list of matching positions in itemstr to a list, if
    251      // "retmatchpos" is set.
    252      if (retmatchpos) {
    253        match_positions = tv_list_alloc(kListLenMayKnow);
    254        // Fill position information
    255        int j = 0;
    256        const char *p = str;
    257        while (*p != NUL && j < FUZZY_MATCH_MAX_LEN) {
    258          if (!ascii_iswhite(utf_ptr2char(p)) || matchseq) {
    259            tv_list_append_number(match_positions, matches[j]);
    260            j++;
    261          }
    262          MB_PTR_ADV(p);
    263        }
    264      }
    265      items[match_count].idx = match_count;
    266      items[match_count].item = li;
    267      items[match_count].score = score;
    268      items[match_count].pat = str;
    269      items[match_count].startpos = (int)matches[0];
    270      items[match_count].itemstr = itemstr_copy;
    271      items[match_count].itemstr_allocated = itemstr_allocate;
    272      items[match_count].lmatchpos = match_positions;
    273 
    274      match_count++;
    275    }
    276    tv_clear(&rettv);
    277  });
    278 
    279  if (match_count > 0) {
    280    // Sort the list by the descending order of the match score
    281    qsort(items, (size_t)match_count, sizeof(fuzzyItem_T), fuzzy_match_item_compare);
    282 
    283    // For matchfuzzy(), return a list of matched strings.
    284    //          ['str1', 'str2', 'str3']
    285    // For matchfuzzypos(), return a list with three items.
    286    // The first item is a list of matched strings. The second item
    287    // is a list of lists where each list item is a list of matched
    288    // character positions. The third item is a list of matching scores.
    289    //      [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]]
    290    list_T *retlist;
    291    if (retmatchpos) {
    292      const listitem_T *const li = tv_list_find(fmatchlist, 0);
    293      assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
    294      retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
    295    } else {
    296      retlist = fmatchlist;
    297    }
    298 
    299    // Copy the matching strings to the return list
    300    for (int i = 0; i < match_count; i++) {
    301      tv_list_append_tv(retlist, TV_LIST_ITEM_TV(items[i].item));
    302    }
    303 
    304    // next copy the list of matching positions
    305    if (retmatchpos) {
    306      const listitem_T *li = tv_list_find(fmatchlist, -2);
    307      assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
    308      retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
    309 
    310      for (int i = 0; i < match_count; i++) {
    311        assert(items[i].lmatchpos != NULL);
    312        tv_list_append_list(retlist, items[i].lmatchpos);
    313        items[i].lmatchpos = NULL;
    314      }
    315 
    316      // copy the matching scores
    317      li = tv_list_find(fmatchlist, -1);
    318      assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL);
    319      retlist = TV_LIST_ITEM_TV(li)->vval.v_list;
    320      for (int i = 0; i < match_count; i++) {
    321        tv_list_append_number(retlist, items[i].score);
    322      }
    323    }
    324  }
    325 
    326  for (int i = 0; i < match_count; i++) {
    327    if (items[i].itemstr_allocated) {
    328      xfree(items[i].itemstr);
    329    }
    330    assert(items[i].lmatchpos == NULL);
    331  }
    332  xfree(items);
    333 }
    334 
    335 /// Do fuzzy matching. Returns the list of matched strings in "rettv".
    336 /// If "retmatchpos" is true, also returns the matching character positions.
    337 static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv,
    338                          const bool retmatchpos)
    339  FUNC_ATTR_NONNULL_ALL
    340 {
    341  // validate and get the arguments
    342  if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) {
    343    semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()");
    344    return;
    345  }
    346  if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) {
    347    semsg(_(e_invarg2), tv_get_string(&argvars[1]));
    348    return;
    349  }
    350 
    351  Callback cb = CALLBACK_NONE;
    352  const char *key = NULL;
    353  bool matchseq = false;
    354  int max_matches = 0;
    355  if (argvars[2].v_type != VAR_UNKNOWN) {
    356    if (tv_check_for_nonnull_dict_arg(argvars, 2) == FAIL) {
    357      return;
    358    }
    359 
    360    // To search a dict, either a callback function or a key can be
    361    // specified.
    362    dict_T *const d = argvars[2].vval.v_dict;
    363    const dictitem_T *di;
    364    if ((di = tv_dict_find(d, "key", -1)) != NULL) {
    365      if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL
    366          || *di->di_tv.vval.v_string == NUL) {
    367        semsg(_(e_invargNval), "key", tv_get_string(&di->di_tv));
    368        return;
    369      }
    370      key = tv_get_string(&di->di_tv);
    371    } else if (!tv_dict_get_callback(d, "text_cb", -1, &cb)) {
    372      semsg(_(e_invargval), "text_cb");
    373      return;
    374    }
    375 
    376    if ((di = tv_dict_find(d, "limit", -1)) != NULL) {
    377      if (di->di_tv.v_type != VAR_NUMBER) {
    378        semsg(_(e_invargval), "limit");
    379        return;
    380      }
    381      max_matches = (int)tv_get_number_chk(&di->di_tv, NULL);
    382    }
    383 
    384    if (tv_dict_has_key(d, "matchseq")) {
    385      matchseq = true;
    386    }
    387  }
    388 
    389  // get the fuzzy matches
    390  tv_list_alloc_ret(rettv, retmatchpos ? 3 : kListLenUnknown);
    391  if (retmatchpos) {
    392    // For matchfuzzypos(), a list with three items are returned. First
    393    // item is a list of matching strings, the second item is a list of
    394    // lists with matching positions within each string and the third item
    395    // is the list of scores of the matches.
    396    tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
    397    tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
    398    tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown));
    399  }
    400 
    401  fuzzy_match_in_list(argvars[0].vval.v_list, (char *)tv_get_string(&argvars[1]),
    402                      matchseq, key, &cb, retmatchpos, rettv->vval.v_list, max_matches);
    403 
    404  callback_free(&cb);
    405 }
    406 
    407 /// "matchfuzzy()" function
    408 void f_matchfuzzy(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
    409 {
    410  do_fuzzymatch(argvars, rettv, false);
    411 }
    412 
    413 /// "matchfuzzypos()" function
    414 void f_matchfuzzypos(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
    415 {
    416  do_fuzzymatch(argvars, rettv, true);
    417 }
    418 
    419 /// Same as fuzzy_match_item_compare() except for use with a string match
    420 static int fuzzy_match_str_compare(const void *const s1, const void *const s2)
    421  FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
    422 {
    423  const int v1 = ((fuzmatch_str_T *)s1)->score;
    424  const int v2 = ((fuzmatch_str_T *)s2)->score;
    425  const int idx1 = ((fuzmatch_str_T *)s1)->idx;
    426  const int idx2 = ((fuzmatch_str_T *)s2)->idx;
    427 
    428  if (v1 == v2) {
    429    return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
    430  } else {
    431    return v1 > v2 ? -1 : 1;
    432  }
    433 }
    434 
    435 /// Sort fuzzy matches by score
    436 static void fuzzy_match_str_sort(fuzmatch_str_T *const fm, const int sz)
    437  FUNC_ATTR_NONNULL_ALL
    438 {
    439  // Sort the list by the descending order of the match score
    440  qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_str_compare);
    441 }
    442 
    443 /// Same as fuzzy_match_item_compare() except for use with a function name
    444 /// string match. <SNR> functions should be sorted to the end.
    445 static int fuzzy_match_func_compare(const void *const s1, const void *const s2)
    446  FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
    447 {
    448  const int v1 = ((fuzmatch_str_T *)s1)->score;
    449  const int v2 = ((fuzmatch_str_T *)s2)->score;
    450  const int idx1 = ((fuzmatch_str_T *)s1)->idx;
    451  const int idx2 = ((fuzmatch_str_T *)s2)->idx;
    452  const char *const str1 = ((fuzmatch_str_T *)s1)->str;
    453  const char *const str2 = ((fuzmatch_str_T *)s2)->str;
    454 
    455  if (*str1 != '<' && *str2 == '<') {
    456    return -1;
    457  }
    458  if (*str1 == '<' && *str2 != '<') {
    459    return 1;
    460  }
    461  if (v1 == v2) {
    462    return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1;
    463  }
    464  return v1 > v2 ? -1 : 1;
    465 }
    466 
    467 /// Sort fuzzy matches of function names by score.
    468 /// <SNR> functions should be sorted to the end.
    469 static void fuzzy_match_func_sort(fuzmatch_str_T *const fm, const int sz)
    470  FUNC_ATTR_NONNULL_ALL
    471 {
    472  // Sort the list by the descending order of the match score
    473  qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_func_compare);
    474 }
    475 
    476 /// Fuzzy match "pat" in "str".
    477 /// @returns 0 if there is no match. Otherwise, returns the match score.
    478 int fuzzy_match_str(char *const str, const char *const pat)
    479  FUNC_ATTR_WARN_UNUSED_RESULT
    480 {
    481  if (str == NULL || pat == NULL) {
    482    return 0;
    483  }
    484 
    485  int score = FUZZY_SCORE_NONE;
    486  uint32_t matchpos[FUZZY_MATCH_MAX_LEN];
    487  fuzzy_match(str, pat, true, &score, matchpos, ARRAY_SIZE(matchpos));
    488 
    489  return score;
    490 }
    491 
    492 /// Fuzzy match the position of string "pat" in string "str".
    493 /// @returns a dynamic array of matching positions. If there is no match, returns NULL.
    494 garray_T *fuzzy_match_str_with_pos(char *const str, const char *const pat)
    495 {
    496  if (str == NULL || pat == NULL) {
    497    return NULL;
    498  }
    499 
    500  garray_T *match_positions = xmalloc(sizeof(garray_T));
    501  ga_init(match_positions, sizeof(uint32_t), 10);
    502 
    503  int score = FUZZY_SCORE_NONE;
    504  uint32_t matches[FUZZY_MATCH_MAX_LEN];
    505  if (!fuzzy_match(str, pat, false, &score, matches, FUZZY_MATCH_MAX_LEN)
    506      || score == FUZZY_SCORE_NONE) {
    507    ga_clear(match_positions);
    508    xfree(match_positions);
    509    return NULL;
    510  }
    511 
    512  int j = 0;
    513  for (const char *p = pat; *p != NUL; MB_PTR_ADV(p)) {
    514    if (!ascii_iswhite(utf_ptr2char(p))) {
    515      GA_APPEND(uint32_t, match_positions, matches[j]);
    516      j++;
    517    }
    518  }
    519 
    520  return match_positions;
    521 }
    522 
    523 /// This function splits the line pointed to by `*ptr` into words and performs
    524 /// a fuzzy match for the pattern `pat` on each word. It iterates through the
    525 /// line, moving `*ptr` to the start of each word during the process.
    526 ///
    527 /// If a match is found:
    528 /// - `*ptr` points to the start of the matched word.
    529 /// - `*len` is set to the length of the matched word.
    530 /// - `*score` contains the match score.
    531 ///
    532 /// If no match is found, `*ptr` is updated to the end of the line.
    533 bool fuzzy_match_str_in_line(char **ptr, char *pat, int *len, pos_T *current_pos, int *score)
    534 {
    535  char *str = *ptr;
    536  char *strBegin = str;
    537  char *end = NULL;
    538  char *start = NULL;
    539  bool found = false;
    540 
    541  if (str == NULL || pat == NULL) {
    542    return found;
    543  }
    544  char *line_end = find_line_end(str);
    545 
    546  while (str < line_end) {
    547    // Skip non-word characters
    548    start = find_word_start(str);
    549    if (*start == NUL) {
    550      break;
    551    }
    552    end = find_word_end(start);
    553 
    554    // Extract the word from start to end
    555    char save_end = *end;
    556    *end = NUL;
    557 
    558    // Perform fuzzy match
    559    *score = fuzzy_match_str(start, pat);
    560    *end = save_end;
    561 
    562    if (*score != FUZZY_SCORE_NONE) {
    563      *len = (int)(end - start);
    564      found = true;
    565      *ptr = start;
    566      if (current_pos) {
    567        current_pos->col += (int)(end - strBegin);
    568      }
    569      break;
    570    }
    571 
    572    // Move to the end of the current word for the next iteration
    573    str = end;
    574    // Ensure we continue searching after the current word
    575    while (*str != NUL && !vim_iswordp(str)) {
    576      MB_PTR_ADV(str);
    577    }
    578  }
    579 
    580  if (!found) {
    581    *ptr = line_end;
    582  }
    583 
    584  return found;
    585 }
    586 
    587 /// Search for the next fuzzy match in the specified buffer.
    588 /// This function attempts to find the next occurrence of the given pattern
    589 /// in the buffer, starting from the current position. It handles line wrapping
    590 /// and direction of search.
    591 ///
    592 /// Return true if a match is found, otherwise false.
    593 bool search_for_fuzzy_match(buf_T *buf, pos_T *pos, char *pattern, int dir, pos_T *start_pos,
    594                            int *len, char **ptr, int *score)
    595 {
    596  pos_T current_pos = *pos;
    597  pos_T circly_end;
    598  bool found_new_match = false;
    599  bool looped_around = false;
    600 
    601  bool whole_line = ctrl_x_mode_whole_line();
    602 
    603  if (buf == curbuf) {
    604    circly_end = *start_pos;
    605  } else {
    606    circly_end.lnum = buf->b_ml.ml_line_count;
    607    circly_end.col = 0;
    608    circly_end.coladd = 0;
    609  }
    610 
    611  if (whole_line && start_pos->lnum != pos->lnum) {
    612    current_pos.lnum += dir;
    613  }
    614 
    615  while (true) {
    616    // Check if looped around and back to start position
    617    if (looped_around && (whole_line ? current_pos.lnum == circly_end.lnum
    618                                     : equalpos(current_pos, circly_end))) {
    619      break;
    620    }
    621 
    622    // Ensure current_pos is valid
    623    if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) {
    624      // Get the current line buffer
    625      *ptr = ml_get_buf(buf, current_pos.lnum);
    626      if (!whole_line) {
    627        *ptr += current_pos.col;
    628      }
    629 
    630      // If ptr is end of line is reached, move to next line
    631      // or previous line based on direction
    632      if (*ptr != NULL && **ptr != NUL) {
    633        if (!whole_line) {
    634          // Try to find a fuzzy match in the current line starting
    635          // from current position
    636          found_new_match = fuzzy_match_str_in_line(ptr, pattern,
    637                                                    len, &current_pos, score);
    638          if (found_new_match) {
    639            *pos = current_pos;
    640            break;
    641          } else if (looped_around && current_pos.lnum == circly_end.lnum) {
    642            break;
    643          }
    644        } else {
    645          if (fuzzy_match_str(*ptr, pattern) != FUZZY_SCORE_NONE) {
    646            found_new_match = true;
    647            *pos = current_pos;
    648            *len = ml_get_buf_len(buf, current_pos.lnum);
    649            break;
    650          }
    651        }
    652      }
    653    }
    654 
    655    // Move to the next line or previous line based on direction
    656    if (dir == FORWARD) {
    657      if (++current_pos.lnum > buf->b_ml.ml_line_count) {
    658        if (p_ws) {
    659          current_pos.lnum = 1;
    660          looped_around = true;
    661        } else {
    662          break;
    663        }
    664      }
    665    } else {
    666      if (--current_pos.lnum < 1) {
    667        if (p_ws) {
    668          current_pos.lnum = buf->b_ml.ml_line_count;
    669          looped_around = true;
    670        } else {
    671          break;
    672        }
    673      }
    674    }
    675    current_pos.col = 0;
    676  }
    677 
    678  return found_new_match;
    679 }
    680 
    681 /// Free an array of fuzzy string matches "fuzmatch[count]".
    682 void fuzmatch_str_free(fuzmatch_str_T *const fuzmatch, int count)
    683 {
    684  if (fuzmatch == NULL) {
    685    return;
    686  }
    687  for (int i = 0; i < count; i++) {
    688    xfree(fuzmatch[count].str);
    689  }
    690  xfree(fuzmatch);
    691 }
    692 
    693 /// Copy a list of fuzzy matches into a string list after sorting the matches by
    694 /// the fuzzy score. Frees the memory allocated for "fuzmatch".
    695 void fuzzymatches_to_strmatches(fuzmatch_str_T *const fuzmatch, char ***const matches,
    696                                const int count, const bool funcsort)
    697  FUNC_ATTR_NONNULL_ARG(2)
    698 {
    699  if (count <= 0) {
    700    goto theend;
    701  }
    702 
    703  *matches = xmalloc((size_t)count * sizeof(char *));
    704 
    705  // Sort the list by the descending order of the match score
    706  if (funcsort) {
    707    fuzzy_match_func_sort(fuzmatch, count);
    708  } else {
    709    fuzzy_match_str_sort(fuzmatch, count);
    710  }
    711 
    712  for (int i = 0; i < count; i++) {
    713    (*matches)[i] = fuzmatch[i].str;
    714  }
    715 
    716 theend:
    717  xfree(fuzmatch);
    718 }
    719 
    720 /// Fuzzy match algorithm ported from https://github.com/jhawthorn/fzy.
    721 /// This implementation extends the original by supporting multibyte characters.
    722 
    723 #define MATCH_MAX_LEN FUZZY_MATCH_MAX_LEN
    724 
    725 #define SCORE_GAP_LEADING -0.005
    726 #define SCORE_GAP_TRAILING -0.005
    727 #define SCORE_GAP_INNER -0.01
    728 #define SCORE_MATCH_CONSECUTIVE 1.0
    729 #define SCORE_MATCH_SLASH 0.9
    730 #define SCORE_MATCH_WORD 0.8
    731 #define SCORE_MATCH_CAPITAL 0.7
    732 #define SCORE_MATCH_DOT 0.6
    733 
    734 static int has_match(const char *const needle, const char *const haystack)
    735 {
    736  if (!needle || !haystack || !*needle) {
    737    return FAIL;
    738  }
    739 
    740  const char *n_ptr = needle;
    741  const char *h_ptr = haystack;
    742 
    743  while (*n_ptr) {
    744    const int n_char = utf_ptr2char(n_ptr);
    745    bool found = false;
    746 
    747    while (*h_ptr) {
    748      const int h_char = utf_ptr2char(h_ptr);
    749      if (n_char == h_char || mb_toupper(n_char) == h_char) {
    750        found = true;
    751        h_ptr += utfc_ptr2len(h_ptr);
    752        break;
    753      }
    754      h_ptr += utfc_ptr2len(h_ptr);
    755    }
    756 
    757    if (!found) {
    758      return FAIL;
    759    }
    760 
    761    n_ptr += utfc_ptr2len(n_ptr);
    762  }
    763 
    764  return OK;
    765 }
    766 
    767 struct match_struct {
    768  int needle_len;
    769  int haystack_len;
    770  int lower_needle[MATCH_MAX_LEN];    ///< stores codepoints
    771  int lower_haystack[MATCH_MAX_LEN];  ///< stores codepoints
    772  score_t match_bonus[MATCH_MAX_LEN];
    773 };
    774 
    775 #define IS_WORD_SEP(c) ((c) == '-' || (c) == '_' || (c) == ' ')
    776 #define IS_PATH_SEP(c) ((c) == '/')
    777 #define IS_DOT(c)      ((c) == '.')
    778 
    779 static score_t compute_bonus_codepoint(int last_c, int c)
    780 {
    781  if (ASCII_ISALNUM(c) || vim_iswordc(c)) {
    782    if (IS_PATH_SEP(last_c)) {
    783      return SCORE_MATCH_SLASH;
    784    }
    785    if (IS_WORD_SEP(last_c)) {
    786      return SCORE_MATCH_WORD;
    787    }
    788    if (IS_DOT(last_c)) {
    789      return SCORE_MATCH_DOT;
    790    }
    791    if (mb_isupper(c) && mb_islower(last_c)) {
    792      return SCORE_MATCH_CAPITAL;
    793    }
    794  }
    795  return 0;
    796 }
    797 
    798 static void setup_match_struct(match_struct *const match, const char *const needle,
    799                               const char *const haystack)
    800 {
    801  int i = 0;
    802  const char *p = needle;
    803  while (*p != NUL && i < MATCH_MAX_LEN) {
    804    const int c = utf_ptr2char(p);
    805    match->lower_needle[i++] = mb_tolower(c);
    806    MB_PTR_ADV(p);
    807  }
    808  match->needle_len = i;
    809 
    810  i = 0;
    811  p = haystack;
    812  int prev_c = '/';
    813  while (*p != NUL && i < MATCH_MAX_LEN) {
    814    const int c = utf_ptr2char(p);
    815    match->lower_haystack[i] = mb_tolower(c);
    816    match->match_bonus[i] = compute_bonus_codepoint(prev_c, c);
    817    prev_c = c;
    818    MB_PTR_ADV(p);
    819    i++;
    820  }
    821  match->haystack_len = i;
    822 }
    823 
    824 static inline void match_row(const match_struct *match, int row, score_t *curr_D, score_t *curr_M,
    825                             const score_t *last_D, const score_t *last_M)
    826 {
    827  int n = match->needle_len;
    828  int m = match->haystack_len;
    829  int i = row;
    830 
    831  const int *lower_needle = match->lower_needle;
    832  const int *lower_haystack = match->lower_haystack;
    833  const score_t *match_bonus = match->match_bonus;
    834 
    835  score_t prev_score = (score_t)SCORE_MIN;
    836  score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
    837 
    838  // These will not be used with this value, but not all compilers see it
    839  score_t prev_M = (score_t)SCORE_MIN, prev_D = (score_t)SCORE_MIN;
    840 
    841  for (int j = 0; j < m; j++) {
    842    if (lower_needle[i] == lower_haystack[j]) {
    843      score_t score = (score_t)SCORE_MIN;
    844      if (!i) {
    845        score = (j * SCORE_GAP_LEADING) + match_bonus[j];
    846      } else if (j) {  // i > 0 && j > 0
    847        score = MAX(prev_M + match_bonus[j],
    848                    // consecutive match, doesn't stack with match_bonus
    849                    prev_D + SCORE_MATCH_CONSECUTIVE);
    850      }
    851      prev_D = last_D[j];
    852      prev_M = last_M[j];
    853      curr_D[j] = score;
    854      curr_M[j] = prev_score = MAX(score, prev_score + gap_score);
    855    } else {
    856      prev_D = last_D[j];
    857      prev_M = last_M[j];
    858      curr_D[j] = (score_t)SCORE_MIN;
    859      curr_M[j] = prev_score = prev_score + gap_score;
    860    }
    861  }
    862 }
    863 
    864 static score_t match_positions(const char *const needle, const char *const haystack,
    865                               uint32_t *const positions)
    866 {
    867  if (!needle || !haystack || !*needle) {
    868    return (score_t)SCORE_MIN;
    869  }
    870 
    871  match_struct match;
    872  setup_match_struct(&match, needle, haystack);
    873 
    874  int n = match.needle_len;
    875  int m = match.haystack_len;
    876 
    877  if (m > MATCH_MAX_LEN || n > m) {
    878    // Unreasonably large candidate: return no score
    879    // If it is a valid match it will still be returned, it will
    880    // just be ranked below any reasonably sized candidates
    881    return (score_t)SCORE_MIN;
    882  } else if (n == m) {
    883    // Since this method can only be called with a haystack which
    884    // matches needle. If the lengths of the strings are equal the
    885    // strings themselves must also be equal (ignoring case).
    886    if (positions) {
    887      for (int i = 0; i < n; i++) {
    888        positions[i] = (uint32_t)i;
    889      }
    890    }
    891    return (score_t)SCORE_MAX;
    892  }
    893 
    894  // ensure n * MATCH_MAX_LEN * 2 won't overflow
    895  if ((size_t)n > (SIZE_MAX / sizeof(score_t)) / MATCH_MAX_LEN / 2) {
    896    return (score_t)SCORE_MIN;
    897  }
    898 
    899  // Allocate for both D and M matrices in one contiguous block
    900  score_t *block = (score_t *)xmalloc(sizeof(score_t) * MATCH_MAX_LEN * (size_t)n * 2);
    901 
    902  // D[][] Stores the best score for this position ending with a match.
    903  // M[][] Stores the best possible score at this position.
    904  score_t(*D)[MATCH_MAX_LEN] = (score_t(*)[MATCH_MAX_LEN])(block);
    905  score_t(*M)[MATCH_MAX_LEN] = (score_t(*)[MATCH_MAX_LEN])(block
    906                                                           + MATCH_MAX_LEN * (size_t)n);
    907 
    908  match_row(&match, 0, D[0], M[0], D[0], M[0]);
    909  for (int i = 1; i < n; i++) {
    910    match_row(&match, i, D[i], M[i], D[i - 1], M[i - 1]);
    911  }
    912 
    913  // backtrace to find the positions of optimal matching
    914  if (positions) {
    915    int match_required = 0;
    916    for (int i = n - 1, j = m - 1; i >= 0; i--) {
    917      for (; j >= 0; j--) {
    918        // There may be multiple paths which result in
    919        // the optimal weight.
    920        //
    921        // For simplicity, we will pick the first one
    922        // we encounter, the latest in the candidate
    923        // string.
    924        if (D[i][j] != (score_t)SCORE_MIN
    925            && (match_required || D[i][j] == M[i][j])) {
    926          // If this score was determined using
    927          // SCORE_MATCH_CONSECUTIVE, the
    928          // previous character MUST be a match
    929          match_required = i && j
    930                           && M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE;
    931          positions[i] = (uint32_t)(j--);
    932          break;
    933        }
    934      }
    935    }
    936  }
    937 
    938  score_t result = M[n - 1][m - 1];
    939 
    940  xfree(block);
    941  return result;
    942 }