fuzzy.c (29250B)
1 // fuzzy.c: fuzzy matching algorithm and related functions 2 // 3 // Portions of this file are adapted from fzy (https://github.com/jhawthorn/fzy) 4 // Original code: 5 // Copyright (c) 2014 John Hawthorn 6 // Licensed under the MIT License. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 #include <assert.h> 27 #include <limits.h> 28 #include <math.h> 29 #include <stdbool.h> 30 #include <stdint.h> 31 #include <stdlib.h> 32 #include <string.h> 33 34 #include "nvim/ascii_defs.h" 35 #include "nvim/charset.h" 36 #include "nvim/errors.h" 37 #include "nvim/eval.h" 38 #include "nvim/eval/typval.h" 39 #include "nvim/fuzzy.h" 40 #include "nvim/garray.h" 41 #include "nvim/garray_defs.h" 42 #include "nvim/globals.h" 43 #include "nvim/insexpand.h" 44 #include "nvim/macros_defs.h" 45 #include "nvim/mbyte.h" 46 #include "nvim/memline.h" 47 #include "nvim/memory.h" 48 #include "nvim/message.h" 49 50 typedef double score_t; 51 52 #define SCORE_MAX INFINITY 53 #define SCORE_MIN (-INFINITY) 54 #define SCORE_SCALE 1000 55 56 typedef struct { 57 int idx; ///< used for stable sort 58 listitem_T *item; 59 int score; 60 list_T *lmatchpos; 61 char *pat; 62 char *itemstr; 63 bool itemstr_allocated; 64 int startpos; 65 } fuzzyItem_T; 66 67 typedef struct match_struct match_struct; 68 69 #include "fuzzy.c.generated.h" 70 71 /// fuzzy_match() 72 /// 73 /// @return true if "pat_arg" matches "str". Also returns the match score in 74 /// "outScore" and the matching character positions in "matches". 75 bool fuzzy_match(char *const str, const char *const pat_arg, const bool matchseq, 76 int *const outScore, uint32_t *const matches, const int maxMatches) 77 FUNC_ATTR_NONNULL_ALL 78 { 79 bool complete = false; 80 int numMatches = 0; 81 82 *outScore = 0; 83 84 char *const save_pat = xstrdup(pat_arg); 85 char *pat = save_pat; 86 char *p = pat; 87 88 // Try matching each word in "pat_arg" in "str" 89 while (true) { 90 if (matchseq) { 91 complete = true; 92 } else { 93 // Extract one word from the pattern (separated by space) 94 p = skipwhite(p); 95 if (*p == NUL) { 96 break; 97 } 98 pat = p; 99 while (*p != NUL && !ascii_iswhite(utf_ptr2char(p))) { 100 MB_PTR_ADV(p); 101 } 102 if (*p == NUL) { // processed all the words 103 complete = true; 104 } 105 *p = NUL; 106 } 107 108 int score = FUZZY_SCORE_NONE; 109 if (has_match(pat, str)) { 110 score_t fzy_score = match_positions(pat, str, matches + numMatches); 111 score = (fzy_score == (score_t)SCORE_MIN 112 ? INT_MIN + 1 113 : (fzy_score == (score_t)SCORE_MAX 114 ? INT_MAX 115 : (fzy_score < 0 116 ? (int)ceil(fzy_score * SCORE_SCALE - 0.5) 117 : (int)floor(fzy_score * SCORE_SCALE + 0.5)))); 118 } 119 120 if (score == FUZZY_SCORE_NONE) { 121 numMatches = 0; 122 *outScore = FUZZY_SCORE_NONE; 123 break; 124 } 125 126 if (score > 0 && *outScore > INT_MAX - score) { 127 *outScore = INT_MAX; 128 } else if (score < 0 && *outScore < INT_MIN + 1 - score) { 129 *outScore = INT_MIN + 1; 130 } else { 131 *outScore += score; 132 } 133 134 numMatches += mb_charlen(pat); 135 136 if (complete || numMatches >= maxMatches) { 137 break; 138 } 139 140 // try matching the next word 141 p++; 142 } 143 144 xfree(save_pat); 145 return numMatches != 0; 146 } 147 148 /// Sort the fuzzy matches in the descending order of the match score. 149 /// For items with same score, retain the order using the index (stable sort) 150 static int fuzzy_match_item_compare(const void *const s1, const void *const s2) 151 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE 152 { 153 const int v1 = ((const fuzzyItem_T *)s1)->score; 154 const int v2 = ((const fuzzyItem_T *)s2)->score; 155 156 if (v1 == v2) { 157 const char *const pat = ((const fuzzyItem_T *)s1)->pat; 158 const size_t patlen = strlen(pat); 159 int startpos = ((const fuzzyItem_T *)s1)->startpos; 160 const bool exact_match1 = startpos >= 0 161 && strncmp(pat, ((fuzzyItem_T *)s1)->itemstr + startpos, patlen) == 0; 162 startpos = ((const fuzzyItem_T *)s2)->startpos; 163 const bool exact_match2 = startpos >= 0 164 && strncmp(pat, ((fuzzyItem_T *)s2)->itemstr + startpos, patlen) == 0; 165 166 if (exact_match1 == exact_match2) { 167 const int idx1 = ((const fuzzyItem_T *)s1)->idx; 168 const int idx2 = ((const fuzzyItem_T *)s2)->idx; 169 return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; 170 } else if (exact_match2) { 171 return 1; 172 } 173 return -1; 174 } else { 175 return v1 > v2 ? -1 : 1; 176 } 177 } 178 179 /// Fuzzy search the string "str" in a list of "items" and return the matching 180 /// strings in "fmatchlist". 181 /// If "matchseq" is true, then for multi-word search strings, match all the 182 /// words in sequence. 183 /// If "items" is a list of strings, then search for "str" in the list. 184 /// If "items" is a list of dicts, then either use "key" to lookup the string 185 /// for each item or use "item_cb" Funcref function to get the string. 186 /// If "retmatchpos" is true, then return a list of positions where "str" 187 /// matches for each item. 188 static void fuzzy_match_in_list(list_T *const l, char *const str, const bool matchseq, 189 const char *const key, Callback *const item_cb, 190 const bool retmatchpos, list_T *const fmatchlist, 191 const int max_matches) 192 FUNC_ATTR_NONNULL_ARG(2, 5, 7) 193 { 194 int len = tv_list_len(l); 195 if (len == 0) { 196 return; 197 } 198 if (max_matches > 0 && len > max_matches) { 199 len = max_matches; 200 } 201 202 fuzzyItem_T *const items = xcalloc((size_t)len, sizeof(fuzzyItem_T)); 203 int match_count = 0; 204 uint32_t matches[FUZZY_MATCH_MAX_LEN]; 205 206 // For all the string items in items, get the fuzzy matching score 207 TV_LIST_ITER(l, li, { 208 if (max_matches > 0 && match_count >= max_matches) { 209 break; 210 } 211 212 char *itemstr = NULL; 213 bool itemstr_allocate = false; 214 typval_T rettv; 215 216 rettv.v_type = VAR_UNKNOWN; 217 const typval_T *const tv = TV_LIST_ITEM_TV(li); 218 if (tv->v_type == VAR_STRING) { // list of strings 219 itemstr = tv->vval.v_string; 220 } else if (tv->v_type == VAR_DICT 221 && (key != NULL || item_cb->type != kCallbackNone)) { 222 // For a dict, either use the specified key to lookup the string or 223 // use the specified callback function to get the string. 224 if (key != NULL) { 225 itemstr = tv_dict_get_string(tv->vval.v_dict, key, false); 226 } else { 227 typval_T argv[2]; 228 229 // Invoke the supplied callback (if any) to get the dict item 230 tv->vval.v_dict->dv_refcount++; 231 argv[0].v_type = VAR_DICT; 232 argv[0].vval.v_dict = tv->vval.v_dict; 233 argv[1].v_type = VAR_UNKNOWN; 234 if (callback_call(item_cb, 1, argv, &rettv)) { 235 if (rettv.v_type == VAR_STRING) { 236 itemstr = rettv.vval.v_string; 237 itemstr_allocate = true; 238 } 239 } 240 tv_dict_unref(tv->vval.v_dict); 241 } 242 } 243 244 int score; 245 if (itemstr != NULL 246 && fuzzy_match(itemstr, str, matchseq, &score, matches, FUZZY_MATCH_MAX_LEN)) { 247 char *itemstr_copy = itemstr_allocate ? xstrdup(itemstr) : itemstr; 248 list_T *match_positions = NULL; 249 250 // Copy the list of matching positions in itemstr to a list, if 251 // "retmatchpos" is set. 252 if (retmatchpos) { 253 match_positions = tv_list_alloc(kListLenMayKnow); 254 // Fill position information 255 int j = 0; 256 const char *p = str; 257 while (*p != NUL && j < FUZZY_MATCH_MAX_LEN) { 258 if (!ascii_iswhite(utf_ptr2char(p)) || matchseq) { 259 tv_list_append_number(match_positions, matches[j]); 260 j++; 261 } 262 MB_PTR_ADV(p); 263 } 264 } 265 items[match_count].idx = match_count; 266 items[match_count].item = li; 267 items[match_count].score = score; 268 items[match_count].pat = str; 269 items[match_count].startpos = (int)matches[0]; 270 items[match_count].itemstr = itemstr_copy; 271 items[match_count].itemstr_allocated = itemstr_allocate; 272 items[match_count].lmatchpos = match_positions; 273 274 match_count++; 275 } 276 tv_clear(&rettv); 277 }); 278 279 if (match_count > 0) { 280 // Sort the list by the descending order of the match score 281 qsort(items, (size_t)match_count, sizeof(fuzzyItem_T), fuzzy_match_item_compare); 282 283 // For matchfuzzy(), return a list of matched strings. 284 // ['str1', 'str2', 'str3'] 285 // For matchfuzzypos(), return a list with three items. 286 // The first item is a list of matched strings. The second item 287 // is a list of lists where each list item is a list of matched 288 // character positions. The third item is a list of matching scores. 289 // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] 290 list_T *retlist; 291 if (retmatchpos) { 292 const listitem_T *const li = tv_list_find(fmatchlist, 0); 293 assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); 294 retlist = TV_LIST_ITEM_TV(li)->vval.v_list; 295 } else { 296 retlist = fmatchlist; 297 } 298 299 // Copy the matching strings to the return list 300 for (int i = 0; i < match_count; i++) { 301 tv_list_append_tv(retlist, TV_LIST_ITEM_TV(items[i].item)); 302 } 303 304 // next copy the list of matching positions 305 if (retmatchpos) { 306 const listitem_T *li = tv_list_find(fmatchlist, -2); 307 assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); 308 retlist = TV_LIST_ITEM_TV(li)->vval.v_list; 309 310 for (int i = 0; i < match_count; i++) { 311 assert(items[i].lmatchpos != NULL); 312 tv_list_append_list(retlist, items[i].lmatchpos); 313 items[i].lmatchpos = NULL; 314 } 315 316 // copy the matching scores 317 li = tv_list_find(fmatchlist, -1); 318 assert(li != NULL && TV_LIST_ITEM_TV(li)->vval.v_list != NULL); 319 retlist = TV_LIST_ITEM_TV(li)->vval.v_list; 320 for (int i = 0; i < match_count; i++) { 321 tv_list_append_number(retlist, items[i].score); 322 } 323 } 324 } 325 326 for (int i = 0; i < match_count; i++) { 327 if (items[i].itemstr_allocated) { 328 xfree(items[i].itemstr); 329 } 330 assert(items[i].lmatchpos == NULL); 331 } 332 xfree(items); 333 } 334 335 /// Do fuzzy matching. Returns the list of matched strings in "rettv". 336 /// If "retmatchpos" is true, also returns the matching character positions. 337 static void do_fuzzymatch(const typval_T *const argvars, typval_T *const rettv, 338 const bool retmatchpos) 339 FUNC_ATTR_NONNULL_ALL 340 { 341 // validate and get the arguments 342 if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) { 343 semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()"); 344 return; 345 } 346 if (argvars[1].v_type != VAR_STRING || argvars[1].vval.v_string == NULL) { 347 semsg(_(e_invarg2), tv_get_string(&argvars[1])); 348 return; 349 } 350 351 Callback cb = CALLBACK_NONE; 352 const char *key = NULL; 353 bool matchseq = false; 354 int max_matches = 0; 355 if (argvars[2].v_type != VAR_UNKNOWN) { 356 if (tv_check_for_nonnull_dict_arg(argvars, 2) == FAIL) { 357 return; 358 } 359 360 // To search a dict, either a callback function or a key can be 361 // specified. 362 dict_T *const d = argvars[2].vval.v_dict; 363 const dictitem_T *di; 364 if ((di = tv_dict_find(d, "key", -1)) != NULL) { 365 if (di->di_tv.v_type != VAR_STRING || di->di_tv.vval.v_string == NULL 366 || *di->di_tv.vval.v_string == NUL) { 367 semsg(_(e_invargNval), "key", tv_get_string(&di->di_tv)); 368 return; 369 } 370 key = tv_get_string(&di->di_tv); 371 } else if (!tv_dict_get_callback(d, "text_cb", -1, &cb)) { 372 semsg(_(e_invargval), "text_cb"); 373 return; 374 } 375 376 if ((di = tv_dict_find(d, "limit", -1)) != NULL) { 377 if (di->di_tv.v_type != VAR_NUMBER) { 378 semsg(_(e_invargval), "limit"); 379 return; 380 } 381 max_matches = (int)tv_get_number_chk(&di->di_tv, NULL); 382 } 383 384 if (tv_dict_has_key(d, "matchseq")) { 385 matchseq = true; 386 } 387 } 388 389 // get the fuzzy matches 390 tv_list_alloc_ret(rettv, retmatchpos ? 3 : kListLenUnknown); 391 if (retmatchpos) { 392 // For matchfuzzypos(), a list with three items are returned. First 393 // item is a list of matching strings, the second item is a list of 394 // lists with matching positions within each string and the third item 395 // is the list of scores of the matches. 396 tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); 397 tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); 398 tv_list_append_list(rettv->vval.v_list, tv_list_alloc(kListLenUnknown)); 399 } 400 401 fuzzy_match_in_list(argvars[0].vval.v_list, (char *)tv_get_string(&argvars[1]), 402 matchseq, key, &cb, retmatchpos, rettv->vval.v_list, max_matches); 403 404 callback_free(&cb); 405 } 406 407 /// "matchfuzzy()" function 408 void f_matchfuzzy(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 409 { 410 do_fuzzymatch(argvars, rettv, false); 411 } 412 413 /// "matchfuzzypos()" function 414 void f_matchfuzzypos(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 415 { 416 do_fuzzymatch(argvars, rettv, true); 417 } 418 419 /// Same as fuzzy_match_item_compare() except for use with a string match 420 static int fuzzy_match_str_compare(const void *const s1, const void *const s2) 421 FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE 422 { 423 const int v1 = ((fuzmatch_str_T *)s1)->score; 424 const int v2 = ((fuzmatch_str_T *)s2)->score; 425 const int idx1 = ((fuzmatch_str_T *)s1)->idx; 426 const int idx2 = ((fuzmatch_str_T *)s2)->idx; 427 428 if (v1 == v2) { 429 return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; 430 } else { 431 return v1 > v2 ? -1 : 1; 432 } 433 } 434 435 /// Sort fuzzy matches by score 436 static void fuzzy_match_str_sort(fuzmatch_str_T *const fm, const int sz) 437 FUNC_ATTR_NONNULL_ALL 438 { 439 // Sort the list by the descending order of the match score 440 qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_str_compare); 441 } 442 443 /// Same as fuzzy_match_item_compare() except for use with a function name 444 /// string match. <SNR> functions should be sorted to the end. 445 static int fuzzy_match_func_compare(const void *const s1, const void *const s2) 446 FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE 447 { 448 const int v1 = ((fuzmatch_str_T *)s1)->score; 449 const int v2 = ((fuzmatch_str_T *)s2)->score; 450 const int idx1 = ((fuzmatch_str_T *)s1)->idx; 451 const int idx2 = ((fuzmatch_str_T *)s2)->idx; 452 const char *const str1 = ((fuzmatch_str_T *)s1)->str; 453 const char *const str2 = ((fuzmatch_str_T *)s2)->str; 454 455 if (*str1 != '<' && *str2 == '<') { 456 return -1; 457 } 458 if (*str1 == '<' && *str2 != '<') { 459 return 1; 460 } 461 if (v1 == v2) { 462 return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; 463 } 464 return v1 > v2 ? -1 : 1; 465 } 466 467 /// Sort fuzzy matches of function names by score. 468 /// <SNR> functions should be sorted to the end. 469 static void fuzzy_match_func_sort(fuzmatch_str_T *const fm, const int sz) 470 FUNC_ATTR_NONNULL_ALL 471 { 472 // Sort the list by the descending order of the match score 473 qsort(fm, (size_t)sz, sizeof(fuzmatch_str_T), fuzzy_match_func_compare); 474 } 475 476 /// Fuzzy match "pat" in "str". 477 /// @returns 0 if there is no match. Otherwise, returns the match score. 478 int fuzzy_match_str(char *const str, const char *const pat) 479 FUNC_ATTR_WARN_UNUSED_RESULT 480 { 481 if (str == NULL || pat == NULL) { 482 return 0; 483 } 484 485 int score = FUZZY_SCORE_NONE; 486 uint32_t matchpos[FUZZY_MATCH_MAX_LEN]; 487 fuzzy_match(str, pat, true, &score, matchpos, ARRAY_SIZE(matchpos)); 488 489 return score; 490 } 491 492 /// Fuzzy match the position of string "pat" in string "str". 493 /// @returns a dynamic array of matching positions. If there is no match, returns NULL. 494 garray_T *fuzzy_match_str_with_pos(char *const str, const char *const pat) 495 { 496 if (str == NULL || pat == NULL) { 497 return NULL; 498 } 499 500 garray_T *match_positions = xmalloc(sizeof(garray_T)); 501 ga_init(match_positions, sizeof(uint32_t), 10); 502 503 int score = FUZZY_SCORE_NONE; 504 uint32_t matches[FUZZY_MATCH_MAX_LEN]; 505 if (!fuzzy_match(str, pat, false, &score, matches, FUZZY_MATCH_MAX_LEN) 506 || score == FUZZY_SCORE_NONE) { 507 ga_clear(match_positions); 508 xfree(match_positions); 509 return NULL; 510 } 511 512 int j = 0; 513 for (const char *p = pat; *p != NUL; MB_PTR_ADV(p)) { 514 if (!ascii_iswhite(utf_ptr2char(p))) { 515 GA_APPEND(uint32_t, match_positions, matches[j]); 516 j++; 517 } 518 } 519 520 return match_positions; 521 } 522 523 /// This function splits the line pointed to by `*ptr` into words and performs 524 /// a fuzzy match for the pattern `pat` on each word. It iterates through the 525 /// line, moving `*ptr` to the start of each word during the process. 526 /// 527 /// If a match is found: 528 /// - `*ptr` points to the start of the matched word. 529 /// - `*len` is set to the length of the matched word. 530 /// - `*score` contains the match score. 531 /// 532 /// If no match is found, `*ptr` is updated to the end of the line. 533 bool fuzzy_match_str_in_line(char **ptr, char *pat, int *len, pos_T *current_pos, int *score) 534 { 535 char *str = *ptr; 536 char *strBegin = str; 537 char *end = NULL; 538 char *start = NULL; 539 bool found = false; 540 541 if (str == NULL || pat == NULL) { 542 return found; 543 } 544 char *line_end = find_line_end(str); 545 546 while (str < line_end) { 547 // Skip non-word characters 548 start = find_word_start(str); 549 if (*start == NUL) { 550 break; 551 } 552 end = find_word_end(start); 553 554 // Extract the word from start to end 555 char save_end = *end; 556 *end = NUL; 557 558 // Perform fuzzy match 559 *score = fuzzy_match_str(start, pat); 560 *end = save_end; 561 562 if (*score != FUZZY_SCORE_NONE) { 563 *len = (int)(end - start); 564 found = true; 565 *ptr = start; 566 if (current_pos) { 567 current_pos->col += (int)(end - strBegin); 568 } 569 break; 570 } 571 572 // Move to the end of the current word for the next iteration 573 str = end; 574 // Ensure we continue searching after the current word 575 while (*str != NUL && !vim_iswordp(str)) { 576 MB_PTR_ADV(str); 577 } 578 } 579 580 if (!found) { 581 *ptr = line_end; 582 } 583 584 return found; 585 } 586 587 /// Search for the next fuzzy match in the specified buffer. 588 /// This function attempts to find the next occurrence of the given pattern 589 /// in the buffer, starting from the current position. It handles line wrapping 590 /// and direction of search. 591 /// 592 /// Return true if a match is found, otherwise false. 593 bool search_for_fuzzy_match(buf_T *buf, pos_T *pos, char *pattern, int dir, pos_T *start_pos, 594 int *len, char **ptr, int *score) 595 { 596 pos_T current_pos = *pos; 597 pos_T circly_end; 598 bool found_new_match = false; 599 bool looped_around = false; 600 601 bool whole_line = ctrl_x_mode_whole_line(); 602 603 if (buf == curbuf) { 604 circly_end = *start_pos; 605 } else { 606 circly_end.lnum = buf->b_ml.ml_line_count; 607 circly_end.col = 0; 608 circly_end.coladd = 0; 609 } 610 611 if (whole_line && start_pos->lnum != pos->lnum) { 612 current_pos.lnum += dir; 613 } 614 615 while (true) { 616 // Check if looped around and back to start position 617 if (looped_around && (whole_line ? current_pos.lnum == circly_end.lnum 618 : equalpos(current_pos, circly_end))) { 619 break; 620 } 621 622 // Ensure current_pos is valid 623 if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) { 624 // Get the current line buffer 625 *ptr = ml_get_buf(buf, current_pos.lnum); 626 if (!whole_line) { 627 *ptr += current_pos.col; 628 } 629 630 // If ptr is end of line is reached, move to next line 631 // or previous line based on direction 632 if (*ptr != NULL && **ptr != NUL) { 633 if (!whole_line) { 634 // Try to find a fuzzy match in the current line starting 635 // from current position 636 found_new_match = fuzzy_match_str_in_line(ptr, pattern, 637 len, ¤t_pos, score); 638 if (found_new_match) { 639 *pos = current_pos; 640 break; 641 } else if (looped_around && current_pos.lnum == circly_end.lnum) { 642 break; 643 } 644 } else { 645 if (fuzzy_match_str(*ptr, pattern) != FUZZY_SCORE_NONE) { 646 found_new_match = true; 647 *pos = current_pos; 648 *len = ml_get_buf_len(buf, current_pos.lnum); 649 break; 650 } 651 } 652 } 653 } 654 655 // Move to the next line or previous line based on direction 656 if (dir == FORWARD) { 657 if (++current_pos.lnum > buf->b_ml.ml_line_count) { 658 if (p_ws) { 659 current_pos.lnum = 1; 660 looped_around = true; 661 } else { 662 break; 663 } 664 } 665 } else { 666 if (--current_pos.lnum < 1) { 667 if (p_ws) { 668 current_pos.lnum = buf->b_ml.ml_line_count; 669 looped_around = true; 670 } else { 671 break; 672 } 673 } 674 } 675 current_pos.col = 0; 676 } 677 678 return found_new_match; 679 } 680 681 /// Free an array of fuzzy string matches "fuzmatch[count]". 682 void fuzmatch_str_free(fuzmatch_str_T *const fuzmatch, int count) 683 { 684 if (fuzmatch == NULL) { 685 return; 686 } 687 for (int i = 0; i < count; i++) { 688 xfree(fuzmatch[count].str); 689 } 690 xfree(fuzmatch); 691 } 692 693 /// Copy a list of fuzzy matches into a string list after sorting the matches by 694 /// the fuzzy score. Frees the memory allocated for "fuzmatch". 695 void fuzzymatches_to_strmatches(fuzmatch_str_T *const fuzmatch, char ***const matches, 696 const int count, const bool funcsort) 697 FUNC_ATTR_NONNULL_ARG(2) 698 { 699 if (count <= 0) { 700 goto theend; 701 } 702 703 *matches = xmalloc((size_t)count * sizeof(char *)); 704 705 // Sort the list by the descending order of the match score 706 if (funcsort) { 707 fuzzy_match_func_sort(fuzmatch, count); 708 } else { 709 fuzzy_match_str_sort(fuzmatch, count); 710 } 711 712 for (int i = 0; i < count; i++) { 713 (*matches)[i] = fuzmatch[i].str; 714 } 715 716 theend: 717 xfree(fuzmatch); 718 } 719 720 /// Fuzzy match algorithm ported from https://github.com/jhawthorn/fzy. 721 /// This implementation extends the original by supporting multibyte characters. 722 723 #define MATCH_MAX_LEN FUZZY_MATCH_MAX_LEN 724 725 #define SCORE_GAP_LEADING -0.005 726 #define SCORE_GAP_TRAILING -0.005 727 #define SCORE_GAP_INNER -0.01 728 #define SCORE_MATCH_CONSECUTIVE 1.0 729 #define SCORE_MATCH_SLASH 0.9 730 #define SCORE_MATCH_WORD 0.8 731 #define SCORE_MATCH_CAPITAL 0.7 732 #define SCORE_MATCH_DOT 0.6 733 734 static int has_match(const char *const needle, const char *const haystack) 735 { 736 if (!needle || !haystack || !*needle) { 737 return FAIL; 738 } 739 740 const char *n_ptr = needle; 741 const char *h_ptr = haystack; 742 743 while (*n_ptr) { 744 const int n_char = utf_ptr2char(n_ptr); 745 bool found = false; 746 747 while (*h_ptr) { 748 const int h_char = utf_ptr2char(h_ptr); 749 if (n_char == h_char || mb_toupper(n_char) == h_char) { 750 found = true; 751 h_ptr += utfc_ptr2len(h_ptr); 752 break; 753 } 754 h_ptr += utfc_ptr2len(h_ptr); 755 } 756 757 if (!found) { 758 return FAIL; 759 } 760 761 n_ptr += utfc_ptr2len(n_ptr); 762 } 763 764 return OK; 765 } 766 767 struct match_struct { 768 int needle_len; 769 int haystack_len; 770 int lower_needle[MATCH_MAX_LEN]; ///< stores codepoints 771 int lower_haystack[MATCH_MAX_LEN]; ///< stores codepoints 772 score_t match_bonus[MATCH_MAX_LEN]; 773 }; 774 775 #define IS_WORD_SEP(c) ((c) == '-' || (c) == '_' || (c) == ' ') 776 #define IS_PATH_SEP(c) ((c) == '/') 777 #define IS_DOT(c) ((c) == '.') 778 779 static score_t compute_bonus_codepoint(int last_c, int c) 780 { 781 if (ASCII_ISALNUM(c) || vim_iswordc(c)) { 782 if (IS_PATH_SEP(last_c)) { 783 return SCORE_MATCH_SLASH; 784 } 785 if (IS_WORD_SEP(last_c)) { 786 return SCORE_MATCH_WORD; 787 } 788 if (IS_DOT(last_c)) { 789 return SCORE_MATCH_DOT; 790 } 791 if (mb_isupper(c) && mb_islower(last_c)) { 792 return SCORE_MATCH_CAPITAL; 793 } 794 } 795 return 0; 796 } 797 798 static void setup_match_struct(match_struct *const match, const char *const needle, 799 const char *const haystack) 800 { 801 int i = 0; 802 const char *p = needle; 803 while (*p != NUL && i < MATCH_MAX_LEN) { 804 const int c = utf_ptr2char(p); 805 match->lower_needle[i++] = mb_tolower(c); 806 MB_PTR_ADV(p); 807 } 808 match->needle_len = i; 809 810 i = 0; 811 p = haystack; 812 int prev_c = '/'; 813 while (*p != NUL && i < MATCH_MAX_LEN) { 814 const int c = utf_ptr2char(p); 815 match->lower_haystack[i] = mb_tolower(c); 816 match->match_bonus[i] = compute_bonus_codepoint(prev_c, c); 817 prev_c = c; 818 MB_PTR_ADV(p); 819 i++; 820 } 821 match->haystack_len = i; 822 } 823 824 static inline void match_row(const match_struct *match, int row, score_t *curr_D, score_t *curr_M, 825 const score_t *last_D, const score_t *last_M) 826 { 827 int n = match->needle_len; 828 int m = match->haystack_len; 829 int i = row; 830 831 const int *lower_needle = match->lower_needle; 832 const int *lower_haystack = match->lower_haystack; 833 const score_t *match_bonus = match->match_bonus; 834 835 score_t prev_score = (score_t)SCORE_MIN; 836 score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; 837 838 // These will not be used with this value, but not all compilers see it 839 score_t prev_M = (score_t)SCORE_MIN, prev_D = (score_t)SCORE_MIN; 840 841 for (int j = 0; j < m; j++) { 842 if (lower_needle[i] == lower_haystack[j]) { 843 score_t score = (score_t)SCORE_MIN; 844 if (!i) { 845 score = (j * SCORE_GAP_LEADING) + match_bonus[j]; 846 } else if (j) { // i > 0 && j > 0 847 score = MAX(prev_M + match_bonus[j], 848 // consecutive match, doesn't stack with match_bonus 849 prev_D + SCORE_MATCH_CONSECUTIVE); 850 } 851 prev_D = last_D[j]; 852 prev_M = last_M[j]; 853 curr_D[j] = score; 854 curr_M[j] = prev_score = MAX(score, prev_score + gap_score); 855 } else { 856 prev_D = last_D[j]; 857 prev_M = last_M[j]; 858 curr_D[j] = (score_t)SCORE_MIN; 859 curr_M[j] = prev_score = prev_score + gap_score; 860 } 861 } 862 } 863 864 static score_t match_positions(const char *const needle, const char *const haystack, 865 uint32_t *const positions) 866 { 867 if (!needle || !haystack || !*needle) { 868 return (score_t)SCORE_MIN; 869 } 870 871 match_struct match; 872 setup_match_struct(&match, needle, haystack); 873 874 int n = match.needle_len; 875 int m = match.haystack_len; 876 877 if (m > MATCH_MAX_LEN || n > m) { 878 // Unreasonably large candidate: return no score 879 // If it is a valid match it will still be returned, it will 880 // just be ranked below any reasonably sized candidates 881 return (score_t)SCORE_MIN; 882 } else if (n == m) { 883 // Since this method can only be called with a haystack which 884 // matches needle. If the lengths of the strings are equal the 885 // strings themselves must also be equal (ignoring case). 886 if (positions) { 887 for (int i = 0; i < n; i++) { 888 positions[i] = (uint32_t)i; 889 } 890 } 891 return (score_t)SCORE_MAX; 892 } 893 894 // ensure n * MATCH_MAX_LEN * 2 won't overflow 895 if ((size_t)n > (SIZE_MAX / sizeof(score_t)) / MATCH_MAX_LEN / 2) { 896 return (score_t)SCORE_MIN; 897 } 898 899 // Allocate for both D and M matrices in one contiguous block 900 score_t *block = (score_t *)xmalloc(sizeof(score_t) * MATCH_MAX_LEN * (size_t)n * 2); 901 902 // D[][] Stores the best score for this position ending with a match. 903 // M[][] Stores the best possible score at this position. 904 score_t(*D)[MATCH_MAX_LEN] = (score_t(*)[MATCH_MAX_LEN])(block); 905 score_t(*M)[MATCH_MAX_LEN] = (score_t(*)[MATCH_MAX_LEN])(block 906 + MATCH_MAX_LEN * (size_t)n); 907 908 match_row(&match, 0, D[0], M[0], D[0], M[0]); 909 for (int i = 1; i < n; i++) { 910 match_row(&match, i, D[i], M[i], D[i - 1], M[i - 1]); 911 } 912 913 // backtrace to find the positions of optimal matching 914 if (positions) { 915 int match_required = 0; 916 for (int i = n - 1, j = m - 1; i >= 0; i--) { 917 for (; j >= 0; j--) { 918 // There may be multiple paths which result in 919 // the optimal weight. 920 // 921 // For simplicity, we will pick the first one 922 // we encounter, the latest in the candidate 923 // string. 924 if (D[i][j] != (score_t)SCORE_MIN 925 && (match_required || D[i][j] == M[i][j])) { 926 // If this score was determined using 927 // SCORE_MATCH_CONSECUTIVE, the 928 // previous character MUST be a match 929 match_required = i && j 930 && M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE; 931 positions[i] = (uint32_t)(j--); 932 break; 933 } 934 } 935 } 936 } 937 938 score_t result = M[n - 1][m - 1]; 939 940 xfree(block); 941 return result; 942 }