strings.c (91265B)
1 #include <assert.h> 2 #include <inttypes.h> 3 #include <math.h> 4 #include <stdarg.h> 5 #include <stdbool.h> 6 #include <stddef.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 11 #include "auto/config.h" 12 #include "nvim/api/private/defs.h" 13 #include "nvim/api/private/helpers.h" 14 #include "nvim/ascii_defs.h" 15 #include "nvim/assert_defs.h" 16 #include "nvim/charset.h" 17 #include "nvim/errors.h" 18 #include "nvim/eval/encode.h" 19 #include "nvim/eval/typval.h" 20 #include "nvim/eval/typval_defs.h" 21 #include "nvim/ex_docmd.h" 22 #include "nvim/garray.h" 23 #include "nvim/garray_defs.h" 24 #include "nvim/gettext_defs.h" 25 #include "nvim/macros_defs.h" 26 #include "nvim/math.h" 27 #include "nvim/mbyte.h" 28 #include "nvim/mbyte_defs.h" 29 #include "nvim/memory.h" 30 #include "nvim/memory_defs.h" 31 #include "nvim/message.h" 32 #include "nvim/option.h" 33 #include "nvim/plines.h" 34 #include "nvim/strings.h" 35 #include "nvim/types_defs.h" 36 #include "nvim/vim_defs.h" 37 38 #include "strings.c.generated.h" 39 40 static const char e_cannot_mix_positional_and_non_positional_str[] 41 = N_("E1500: Cannot mix positional and non-positional arguments: %s"); 42 static const char e_fmt_arg_nr_unused_str[] 43 = N_("E1501: format argument %d unused in $-style format: %s"); 44 static const char e_positional_num_field_spec_reused_str_str[] 45 = N_("E1502: Positional argument %d used as field width reused as different type: %s/%s"); 46 static const char e_positional_nr_out_of_bounds_str[] 47 = N_("E1503: Positional argument %d out of bounds: %s"); 48 static const char e_positional_arg_num_type_inconsistent_str_str[] 49 = N_("E1504: Positional argument %d type used inconsistently: %s/%s"); 50 static const char e_invalid_format_specifier_str[] 51 = N_("E1505: Invalid format specifier: %s"); 52 static const char e_aptypes_is_null_nr_str[] 53 = "E1507: Internal error: ap_types or ap_types[idx] is NULL: %d: %s"; 54 55 static const char typename_unknown[] = N_("unknown"); 56 static const char typename_int[] = N_("int"); 57 static const char typename_longint[] = N_("long int"); 58 static const char typename_longlongint[] = N_("long long int"); 59 static const char typename_signedsizet[] = N_("signed size_t"); 60 static const char typename_unsignedint[] = N_("unsigned int"); 61 static const char typename_unsignedlongint[] = N_("unsigned long int"); 62 static const char typename_unsignedlonglongint[] = N_("unsigned long long int"); 63 static const char typename_sizet[] = N_("size_t"); 64 static const char typename_pointer[] = N_("pointer"); 65 static const char typename_percent[] = N_("percent"); 66 static const char typename_char[] = N_("char"); 67 static const char typename_string[] = N_("string"); 68 static const char typename_float[] = N_("float"); 69 70 /// Copy up to `len` bytes of `string` into newly allocated memory and 71 /// terminate with a NUL. The allocated memory always has size `len + 1`, even 72 /// when `string` is shorter. 73 char *xstrnsave(const char *string, size_t len) 74 FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL 75 { 76 return strncpy(xmallocz(len), string, len); // NOLINT(runtime/printf) 77 } 78 79 // Same as vim_strsave(), but any characters found in esc_chars are preceded 80 // by a backslash. 81 char *vim_strsave_escaped(const char *string, const char *esc_chars) 82 FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL 83 { 84 return vim_strsave_escaped_ext(string, esc_chars, '\\', false); 85 } 86 87 // Same as vim_strsave_escaped(), but when "bsl" is true also escape 88 // characters where rem_backslash() would remove the backslash. 89 // Escape the characters with "cc". 90 char *vim_strsave_escaped_ext(const char *string, const char *esc_chars, char cc, bool bsl) 91 FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL 92 { 93 // First count the number of backslashes required. 94 // Then allocate the memory and insert them. 95 size_t length = 1; // count the trailing NUL 96 for (const char *p = string; *p; p++) { 97 const size_t l = (size_t)(utfc_ptr2len(p)); 98 if (l > 1) { 99 length += l; // count a multibyte char 100 p += l - 1; 101 continue; 102 } 103 if (vim_strchr(esc_chars, (uint8_t)(*p)) != NULL || (bsl && rem_backslash(p))) { 104 length++; // count a backslash 105 } 106 length++; // count an ordinary char 107 } 108 109 char *escaped_string = xmalloc(length); 110 char *p2 = escaped_string; 111 for (const char *p = string; *p; p++) { 112 const size_t l = (size_t)(utfc_ptr2len(p)); 113 if (l > 1) { 114 memcpy(p2, p, l); 115 p2 += l; 116 p += l - 1; // skip multibyte char 117 continue; 118 } 119 if (vim_strchr(esc_chars, (uint8_t)(*p)) != NULL || (bsl && rem_backslash(p))) { 120 *p2++ = cc; 121 } 122 *p2++ = *p; 123 } 124 *p2 = NUL; 125 126 return escaped_string; 127 } 128 129 /// Save a copy of an unquoted string 130 /// 131 /// Turns string like `a\bc"def\"ghi\\\n"jkl` into `a\bcdef"ghi\\njkl`, for use 132 /// in shell_build_argv: the only purpose of backslash is making next character 133 /// be treated literally inside the double quotes, if this character is 134 /// backslash or quote. 135 /// 136 /// @param[in] string String to copy. 137 /// @param[in] length Length of the string to copy. 138 /// 139 /// @return [allocated] Copy of the string. 140 char *vim_strnsave_unquoted(const char *const string, const size_t length) 141 FUNC_ATTR_MALLOC FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL 142 FUNC_ATTR_NONNULL_RET 143 { 144 #define ESCAPE_COND(p, inquote, string_end) \ 145 (*(p) == '\\' && (inquote) && (p) + 1 < (string_end) && ((p)[1] == '\\' || (p)[1] == '"')) 146 size_t ret_length = 0; 147 bool inquote = false; 148 const char *const string_end = string + length; 149 for (const char *p = string; p < string_end; p++) { 150 if (*p == '"') { 151 inquote = !inquote; 152 } else if (ESCAPE_COND(p, inquote, string_end)) { 153 ret_length++; 154 p++; 155 } else { 156 ret_length++; 157 } 158 } 159 160 char *const ret = xmallocz(ret_length); 161 char *rp = ret; 162 inquote = false; 163 for (const char *p = string; p < string_end; p++) { 164 if (*p == '"') { 165 inquote = !inquote; 166 } else if (ESCAPE_COND(p, inquote, string_end)) { 167 *rp++ = *(++p); 168 } else { 169 *rp++ = *p; 170 } 171 } 172 #undef ESCAPE_COND 173 174 return ret; 175 } 176 177 /// Escape "string" for use as a shell argument with system(). 178 /// This uses single quotes, except when we know we need to use double quotes 179 /// (MS-Windows without 'shellslash' set). 180 /// Escape a newline, depending on the 'shell' option. 181 /// When "do_special" is true also replace "!", "%", "#" and things starting 182 /// with "<" like "<cfile>". 183 /// When "do_newline" is false do not escape newline unless it is csh shell. 184 /// 185 /// @return the result in allocated memory. 186 char *vim_strsave_shellescape(const char *string, bool do_special, bool do_newline) 187 FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL 188 { 189 size_t l; 190 191 // Only csh and similar shells expand '!' within single quotes. For sh and 192 // the like we must not put a backslash before it, it will be taken 193 // literally. If do_special is set the '!' will be escaped twice. 194 // Csh also needs to have "\n" escaped twice when do_special is set. 195 int csh_like = csh_like_shell(); 196 197 // Fish shell uses '\' as an escape character within single quotes, so '\' 198 // itself must be escaped to get a literal '\'. 199 bool fish_like = fish_like_shell(); 200 201 // First count the number of extra bytes required. 202 size_t length = strlen(string) + 3; // two quotes and a trailing NUL 203 for (const char *p = string; *p != NUL; MB_PTR_ADV(p)) { 204 #ifdef MSWIN 205 if (!p_ssl) { 206 if (*p == '"') { 207 length++; // " -> "" 208 } 209 } else 210 #endif 211 if (*p == '\'') { 212 length += 3; // ' => '\'' 213 } 214 if ((*p == '\n' && (csh_like || do_newline)) 215 || (*p == '!' && (csh_like || do_special))) { 216 length++; // insert backslash 217 if (csh_like && do_special) { 218 length++; // insert backslash 219 } 220 } 221 if (do_special && find_cmdline_var(p, &l) >= 0) { 222 length++; // insert backslash 223 p += l - 1; 224 } 225 if (*p == '\\' && fish_like) { 226 length++; // insert backslash 227 } 228 } 229 230 // Allocate memory for the result and fill it. 231 char *escaped_string = xmalloc(length); 232 char *d = escaped_string; 233 234 // add opening quote 235 #ifdef MSWIN 236 if (!p_ssl) { 237 *d++ = '"'; 238 } else 239 #endif 240 *d++ = '\''; 241 242 for (const char *p = string; *p != NUL;) { 243 #ifdef MSWIN 244 if (!p_ssl) { 245 if (*p == '"') { 246 *d++ = '"'; 247 *d++ = '"'; 248 p++; 249 continue; 250 } 251 } else 252 #endif 253 if (*p == '\'') { 254 *d++ = '\''; 255 *d++ = '\\'; 256 *d++ = '\''; 257 *d++ = '\''; 258 p++; 259 continue; 260 } 261 if ((*p == '\n' && (csh_like || do_newline)) 262 || (*p == '!' && (csh_like || do_special))) { 263 *d++ = '\\'; 264 if (csh_like && do_special) { 265 *d++ = '\\'; 266 } 267 *d++ = *p++; 268 continue; 269 } 270 if (do_special && find_cmdline_var(p, &l) >= 0) { 271 *d++ = '\\'; // insert backslash 272 memcpy(d, p, l); // copy the var 273 d += l; 274 p += l; 275 continue; 276 } 277 if (*p == '\\' && fish_like) { 278 *d++ = '\\'; 279 *d++ = *p++; 280 continue; 281 } 282 283 mb_copy_char(&p, &d); 284 } 285 286 // add terminating quote and finish with a NUL 287 #ifdef MSWIN 288 if (!p_ssl) { 289 *d++ = '"'; 290 } else 291 #endif 292 *d++ = '\''; 293 *d = NUL; 294 295 return escaped_string; 296 } 297 298 // Like vim_strsave(), but make all characters uppercase. 299 // This uses ASCII lower-to-upper case translation, language independent. 300 char *vim_strsave_up(const char *string) 301 FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL 302 { 303 char *p1 = xmalloc(strlen(string) + 1); 304 vim_strcpy_up(p1, string); 305 return p1; 306 } 307 308 /// Like xstrnsave(), but make all characters uppercase. 309 /// This uses ASCII lower-to-upper case translation, language independent. 310 char *vim_strnsave_up(const char *string, size_t len) 311 FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL 312 { 313 char *p1 = xmalloc(len + 1); 314 vim_strncpy_up(p1, string, len); 315 return p1; 316 } 317 318 // ASCII lower-to-upper case translation, language independent. 319 void vim_strup(char *p) 320 FUNC_ATTR_NONNULL_ALL 321 { 322 uint8_t c; 323 while ((c = (uint8_t)(*p)) != NUL) { 324 *p++ = (char)(uint8_t)(c < 'a' || c > 'z' ? c : c - 0x20); 325 } 326 } 327 328 // strcpy plus vim_strup. 329 void vim_strcpy_up(char *restrict dst, const char *restrict src) 330 FUNC_ATTR_NONNULL_ALL 331 { 332 uint8_t c; 333 while ((c = (uint8_t)(*src++)) != NUL) { 334 *dst++ = (char)(uint8_t)(c < 'a' || c > 'z' ? c : c - 0x20); 335 } 336 *dst = NUL; 337 } 338 339 // strncpy (NUL-terminated) plus vim_strup. 340 void vim_strncpy_up(char *restrict dst, const char *restrict src, size_t n) 341 FUNC_ATTR_NONNULL_ALL 342 { 343 uint8_t c; 344 while (n-- && (c = (uint8_t)(*src++)) != NUL) { 345 *dst++ = (char)(uint8_t)(c < 'a' || c > 'z' ? c : c - 0x20); 346 } 347 *dst = NUL; 348 } 349 350 // memcpy (does not NUL-terminate) plus vim_strup. 351 void vim_memcpy_up(char *restrict dst, const char *restrict src, size_t n) 352 FUNC_ATTR_NONNULL_ALL 353 { 354 uint8_t c; 355 while (n--) { 356 c = (uint8_t)(*src++); 357 *dst++ = (char)(uint8_t)(c < 'a' || c > 'z' ? c : c - 0x20); 358 } 359 } 360 361 /// Make given string all upper-case or all lower-case 362 /// 363 /// Handles multi-byte characters as good as possible. 364 /// 365 /// @param[in] orig Input string. 366 /// @param[in] upper If true make uppercase, otherwise lowercase 367 /// 368 /// @return [allocated] upper-cased string. 369 char *strcase_save(const char *const orig, bool upper) 370 FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL 371 { 372 // Calculate the initial length and allocate memory for the result 373 size_t orig_len = strlen(orig); 374 // +1 for the null terminator 375 char *res = xmalloc(orig_len + 1); 376 // Index in the result string 377 size_t res_index = 0; 378 // Current position in the original string 379 const char *p = orig; 380 381 while (*p != NUL) { 382 CharInfo char_info = utf_ptr2CharInfo(p); 383 int c = char_info.value < 0 ? (uint8_t)(*p) : char_info.value; 384 int newc = upper ? mb_toupper(c) : mb_tolower(c); 385 // Cast to size_t to avoid mixing types in arithmetic 386 size_t newl = (size_t)utf_char2len(newc); 387 388 // Check if there's enough space in the allocated memory 389 if (res_index + newl > orig_len) { 390 // Need more space: allocate extra space for the new character and the null terminator 391 size_t new_size = res_index + newl + 1; 392 res = xrealloc(res, new_size); 393 // Adjust the original length to the new size, minus the null terminator 394 orig_len = new_size - 1; 395 } 396 397 // Write the possibly new character into the result string 398 utf_char2bytes(newc, res + res_index); 399 // Move the index in the result string 400 res_index += newl; 401 // Move to the next character in the original string 402 p += char_info.len; 403 } 404 405 // Null-terminate the result string 406 res[res_index] = NUL; 407 return res; 408 } 409 410 // delete spaces at the end of a string 411 void del_trailing_spaces(char *ptr) 412 FUNC_ATTR_NONNULL_ALL 413 { 414 char *q = ptr + strlen(ptr); 415 while (--q > ptr && ascii_iswhite(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V) { 416 *q = NUL; 417 } 418 } 419 420 #if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) 421 // Compare two strings, ignoring case, using current locale. 422 // Doesn't work for multi-byte characters. 423 // return 0 for match, < 0 for smaller, > 0 for bigger 424 int vim_stricmp(const char *s1, const char *s2) 425 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE 426 { 427 int i; 428 429 while (true) { 430 i = (int)TOLOWER_LOC((uint8_t)(*s1)) - (int)TOLOWER_LOC((uint8_t)(*s2)); 431 if (i != 0) { 432 return i; // this character different 433 } 434 if (*s1 == NUL) { 435 break; // strings match until NUL 436 } 437 s1++; 438 s2++; 439 } 440 return 0; // strings match 441 } 442 #endif 443 444 #if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) 445 // Compare two strings, for length "len", ignoring case, using current locale. 446 // Doesn't work for multi-byte characters. 447 // return 0 for match, < 0 for smaller, > 0 for bigger 448 int vim_strnicmp(const char *s1, const char *s2, size_t len) 449 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE 450 { 451 int i; 452 453 while (len > 0) { 454 i = (int)TOLOWER_LOC((uint8_t)(*s1)) - (int)TOLOWER_LOC((uint8_t)(*s2)); 455 if (i != 0) { 456 return i; // this character different 457 } 458 if (*s1 == NUL) { 459 break; // strings match until NUL 460 } 461 s1++; 462 s2++; 463 len--; 464 } 465 return 0; // strings match 466 } 467 #endif 468 469 /// Case-insensitive `strequal`. 470 bool striequal(const char *a, const char *b) 471 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT 472 { 473 return (a == NULL && b == NULL) || (a && b && STRICMP(a, b) == 0); 474 } 475 476 /// Compare two ASCII strings, for length "len", ignoring case, ignoring locale. 477 /// 478 /// @return 0 for match, < 0 for smaller, > 0 for bigger 479 int vim_strnicmp_asc(const char *s1, const char *s2, size_t len) 480 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT 481 { 482 int i = 0; 483 while (len > 0) { 484 i = TOLOWER_ASC(*s1) - TOLOWER_ASC(*s2); 485 if (i != 0) { 486 break; // this character is different 487 } 488 if (*s1 == NUL) { 489 break; // strings match until NUL 490 } 491 s1++; 492 s2++; 493 len--; 494 } 495 return i; 496 } 497 498 /// strchr() version which handles multibyte strings 499 /// 500 /// @param[in] string String to search in. 501 /// @param[in] c Character to search for. 502 /// 503 /// @return Pointer to the first byte of the found character in string or NULL 504 /// if it was not found or character is invalid. NUL character is never 505 /// found, use `strlen()` instead. 506 char *vim_strchr(const char *const string, const int c) 507 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT 508 { 509 if (c <= 0) { 510 return NULL; 511 } else if (c < 0x80) { 512 return strchr(string, c); 513 } else { 514 char u8char[MB_MAXBYTES + 1]; 515 const int len = utf_char2bytes(c, u8char); 516 u8char[len] = NUL; 517 return strstr(string, u8char); 518 } 519 } 520 521 // Sort an array of strings. 522 523 static int sort_compare(const void *s1, const void *s2) 524 FUNC_ATTR_NONNULL_ALL 525 { 526 return strcmp(*(char **)s1, *(char **)s2); 527 } 528 529 void sort_strings(char **files, int count) 530 { 531 qsort((void *)files, (size_t)count, sizeof(char *), sort_compare); 532 } 533 534 // Return true if string "s" contains a non-ASCII character (128 or higher). 535 // When "s" is NULL false is returned. 536 bool has_non_ascii(const char *s) 537 FUNC_ATTR_PURE 538 { 539 if (s != NULL) { 540 for (const char *p = s; *p != NUL; p++) { 541 if ((uint8_t)(*p) >= 128) { 542 return true; 543 } 544 } 545 } 546 return false; 547 } 548 549 /// Return true if string "s" contains a non-ASCII character (128 or higher). 550 /// When "s" is NULL false is returned. 551 bool has_non_ascii_len(const char *const s, const size_t len) 552 FUNC_ATTR_PURE 553 { 554 if (s != NULL) { 555 for (size_t i = 0; i < len; i++) { 556 if ((uint8_t)s[i] >= 128) { 557 return true; 558 } 559 } 560 } 561 return false; 562 } 563 564 /// Concatenate two strings and return the result in allocated memory. 565 char *concat_str(const char *restrict str1, const char *restrict str2) 566 FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL 567 { 568 size_t l = strlen(str1); 569 char *dest = xmalloc(l + strlen(str2) + 1); 570 STRCPY(dest, str1); 571 STRCPY(dest + l, str2); 572 return dest; 573 } 574 575 static const char *const e_printf = 576 N_("E766: Insufficient arguments for printf()"); 577 578 /// Get number argument from idxp entry in tvs 579 /// 580 /// Will give an error message for Vimscript entry with invalid type or for insufficient entries. 581 /// 582 /// @param[in] tvs List of Vimscript values. List is terminated by VAR_UNKNOWN value. 583 /// @param[in,out] idxp Index in a list. Will be incremented. Indexing starts at 1. 584 /// 585 /// @return Number value or 0 in case of error. 586 static varnumber_T tv_nr(typval_T *tvs, int *idxp) 587 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT 588 { 589 int idx = *idxp - 1; 590 varnumber_T n = 0; 591 592 if (tvs[idx].v_type == VAR_UNKNOWN) { 593 emsg(_(e_printf)); 594 } else { 595 (*idxp)++; 596 bool err = false; 597 n = tv_get_number_chk(&tvs[idx], &err); 598 if (err) { 599 n = 0; 600 } 601 } 602 return n; 603 } 604 605 /// Get string argument from idxp entry in tvs 606 /// 607 /// Will give an error message for Vimscript entry with invalid type or for 608 /// insufficient entries. 609 /// 610 /// @param[in] tvs List of Vimscript values. List is terminated by VAR_UNKNOWN 611 /// value. 612 /// @param[in,out] idxp Index in a list. Will be incremented. 613 /// @param[out] tofree If the idxp entry in tvs is not a String or a Number, 614 /// it will be converted to String in the same format 615 /// as ":echo" and stored in "*tofree". The caller must 616 /// free "*tofree". 617 /// 618 /// @return String value or NULL in case of error. 619 static const char *tv_str(typval_T *tvs, int *idxp, char **const tofree) 620 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT 621 { 622 int idx = *idxp - 1; 623 const char *s = NULL; 624 625 if (tvs[idx].v_type == VAR_UNKNOWN) { 626 emsg(_(e_printf)); 627 } else { 628 (*idxp)++; 629 if (tvs[idx].v_type == VAR_STRING || tvs[idx].v_type == VAR_NUMBER) { 630 s = tv_get_string_chk(&tvs[idx]); 631 *tofree = NULL; 632 } else { 633 s = *tofree = encode_tv2echo(&tvs[idx], NULL); 634 } 635 } 636 return s; 637 } 638 639 /// Get pointer argument from the next entry in tvs 640 /// 641 /// Will give an error message for Vimscript entry with invalid type or for 642 /// insufficient entries. 643 /// 644 /// @param[in] tvs List of typval_T values. 645 /// @param[in,out] idxp Pointer to the index of the current value. 646 /// 647 /// @return Pointer stored in typval_T or NULL. 648 static const void *tv_ptr(const typval_T *const tvs, int *const idxp) 649 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT 650 { 651 #define OFF(attr) offsetof(union typval_vval_union, attr) 652 STATIC_ASSERT(OFF(v_string) == OFF(v_list) 653 && OFF(v_string) == OFF(v_dict) 654 && OFF(v_string) == OFF(v_blob) 655 && OFF(v_string) == OFF(v_partial) 656 && sizeof(tvs[0].vval.v_string) == sizeof(tvs[0].vval.v_list) 657 && sizeof(tvs[0].vval.v_string) == sizeof(tvs[0].vval.v_dict) 658 && sizeof(tvs[0].vval.v_string) == sizeof(tvs[0].vval.v_blob) 659 && sizeof(tvs[0].vval.v_string) == sizeof(tvs[0].vval.v_partial), 660 "Strings, Dictionaries, Lists, Blobs and Partials are expected to be pointers, " 661 "so that all of them can be accessed via v_string"); 662 #undef OFF 663 const int idx = *idxp - 1; 664 if (tvs[idx].v_type == VAR_UNKNOWN) { 665 emsg(_(e_printf)); 666 return NULL; 667 } 668 (*idxp)++; 669 return tvs[idx].vval.v_string; 670 } 671 672 /// Get float argument from idxp entry in tvs 673 /// 674 /// Will give an error message for Vimscript entry with invalid type or for 675 /// insufficient entries. 676 /// 677 /// @param[in] tvs List of Vimscript values. List is terminated by VAR_UNKNOWN value. 678 /// @param[in,out] idxp Index in a list. Will be incremented. 679 /// 680 /// @return Floating-point value or zero in case of error. 681 static float_T tv_float(typval_T *const tvs, int *const idxp) 682 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT 683 { 684 int idx = *idxp - 1; 685 float_T f = 0; 686 687 if (tvs[idx].v_type == VAR_UNKNOWN) { 688 emsg(_(e_printf)); 689 } else { 690 (*idxp)++; 691 if (tvs[idx].v_type == VAR_FLOAT) { 692 f = tvs[idx].vval.v_float; 693 } else if (tvs[idx].v_type == VAR_NUMBER) { 694 f = (float_T)tvs[idx].vval.v_number; 695 } else { 696 emsg(_("E807: Expected Float argument for printf()")); 697 } 698 } 699 return f; 700 } 701 702 // This code was included to provide a portable vsnprintf() and snprintf(). 703 // Some systems may provide their own, but we always use this one for 704 // consistency. 705 // 706 // This code is based on snprintf.c - a portable implementation of snprintf 707 // by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06. 708 // Included with permission. It was heavily modified to fit in Vim. 709 // The original code, including useful comments, can be found here: 710 // 711 // http://www.ijs.si/software/snprintf/ 712 // 713 // This snprintf() only supports the following conversion specifiers: 714 // s, c, b, B, d, u, o, x, X, p (and synonyms: i, D, U, O - see below) 715 // with flags: '-', '+', ' ', '0' and '#'. 716 // An asterisk is supported for field width as well as precision. 717 // 718 // Limited support for floating point was added: 'f', 'e', 'E', 'g', 'G'. 719 // 720 // Length modifiers 'h' (short int), 'l' (long int) and "ll" (long long int) are 721 // supported. 722 // 723 // The locale is not used, the string is used as a byte string. This is only 724 // relevant for double-byte encodings where the second byte may be '%'. 725 // 726 // It is permitted for "str_m" to be zero, and it is permitted to specify NULL 727 // pointer for resulting string argument if "str_m" is zero (as per ISO C99). 728 // 729 // The return value is the number of characters which would be generated 730 // for the given input, excluding the trailing NUL. If this value 731 // is greater or equal to "str_m", not all characters from the result 732 // have been stored in str, output bytes beyond the ("str_m"-1) -th character 733 // are discarded. If "str_m" is greater than zero it is guaranteed 734 // the resulting string will be NUL-terminated. 735 736 // vim_vsnprintf_typval() can be invoked with either "va_list" or a list of 737 // "typval_T". When the latter is not used it must be NULL. 738 739 /// Append a formatted value to the string 740 /// 741 /// @see vim_vsnprintf_typval(). 742 int vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...) 743 FUNC_ATTR_PRINTF(3, 4) 744 { 745 const size_t len = strlen(str); 746 size_t space; 747 748 if (str_m <= len) { 749 space = 0; 750 } else { 751 space = str_m - len; 752 } 753 va_list ap; 754 va_start(ap, fmt); 755 const int str_l = vim_vsnprintf(str + len, space, fmt, ap); 756 va_end(ap); 757 return str_l; 758 } 759 760 /// Write formatted value to the string 761 /// 762 /// @param[out] str String to write to. 763 /// @param[in] str_m String length. 764 /// @param[in] fmt String format. 765 /// 766 /// @return Number of bytes excluding NUL byte that would be written to the 767 /// string if str_m was greater or equal to the return value. 768 int vim_snprintf(char *str, size_t str_m, const char *fmt, ...) 769 FUNC_ATTR_PRINTF(3, 4) 770 { 771 va_list ap; 772 va_start(ap, fmt); 773 const int str_l = vim_vsnprintf(str, str_m, fmt, ap); 774 va_end(ap); 775 return str_l; 776 } 777 778 // Return the representation of infinity for printf() function: 779 // "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF". 780 static const char *infinity_str(bool positive, char fmt_spec, int force_sign, 781 int space_for_positive) 782 { 783 static const char *table[] = { 784 "-inf", "inf", "+inf", " inf", 785 "-INF", "INF", "+INF", " INF" 786 }; 787 int idx = positive * (1 + force_sign + force_sign * space_for_positive); 788 if (ASCII_ISUPPER(fmt_spec)) { 789 idx += 4; 790 } 791 return table[idx]; 792 } 793 794 /// Like vim_snprintf() except the return value can be safely used to increment a 795 /// buffer length. 796 /// Normal `snprintf()` (and `vim_snprintf()`) returns the number of bytes that 797 /// would have been copied if the destination buffer was large enough. 798 /// This means that you cannot rely on it's return value for the destination 799 /// length because the destination may be shorter than the source. This function 800 /// guarantees the returned length will never be greater than the destination length. 801 size_t vim_snprintf_safelen(char *str, size_t str_m, const char *fmt, ...) 802 { 803 va_list ap; 804 int str_l; 805 806 va_start(ap, fmt); 807 str_l = vim_vsnprintf_typval(str, str_m, fmt, ap, NULL); 808 va_end(ap); 809 810 if (str_l < 0) { 811 *str = NUL; 812 return 0; 813 } 814 return ((size_t)str_l >= str_m) ? str_m - 1 : (size_t)str_l; 815 } 816 817 int vim_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap) 818 { 819 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL); 820 } 821 822 enum { 823 TYPE_UNKNOWN = -1, 824 TYPE_INT, 825 TYPE_LONGINT, 826 TYPE_LONGLONGINT, 827 TYPE_SIGNEDSIZET, 828 TYPE_UNSIGNEDINT, 829 TYPE_UNSIGNEDLONGINT, 830 TYPE_UNSIGNEDLONGLONGINT, 831 TYPE_SIZET, 832 TYPE_POINTER, 833 TYPE_PERCENT, 834 TYPE_CHAR, 835 TYPE_STRING, 836 TYPE_FLOAT, 837 }; 838 839 /// Types that can be used in a format string 840 static int format_typeof(const char *type) 841 FUNC_ATTR_NONNULL_ALL 842 { 843 // allowed values: \0, h, l, L 844 char length_modifier = NUL; 845 846 // current conversion specifier character 847 char fmt_spec = NUL; 848 849 // parse 'h', 'l', 'll' and 'z' length modifiers 850 if (*type == 'h' || *type == 'l' || *type == 'z') { 851 length_modifier = *type; 852 type++; 853 if (length_modifier == 'l' && *type == 'l') { 854 // double l = long long 855 length_modifier = 'L'; 856 type++; 857 } 858 } 859 fmt_spec = *type; 860 861 // common synonyms: 862 switch (fmt_spec) { 863 case 'i': 864 fmt_spec = 'd'; break; 865 case '*': 866 fmt_spec = 'd'; length_modifier = 'h'; break; 867 case 'D': 868 fmt_spec = 'd'; length_modifier = 'l'; break; 869 case 'U': 870 fmt_spec = 'u'; length_modifier = 'l'; break; 871 case 'O': 872 fmt_spec = 'o'; length_modifier = 'l'; break; 873 default: 874 break; 875 } 876 877 // get parameter value, do initial processing 878 switch (fmt_spec) { 879 // '%' and 'c' behave similar to 's' regarding flags and field 880 // widths 881 case '%': 882 return TYPE_PERCENT; 883 884 case 'c': 885 return TYPE_CHAR; 886 887 case 's': 888 case 'S': 889 return TYPE_STRING; 890 891 case 'd': 892 case 'u': 893 case 'b': 894 case 'B': 895 case 'o': 896 case 'x': 897 case 'X': 898 case 'p': 899 // NOTE: the u, b, o, x, X and p conversion specifiers 900 // imply the value is unsigned; d implies a signed 901 // value 902 903 // 0 if numeric argument is zero (or if pointer is 904 // NULL for 'p'), +1 if greater than zero (or nonzero 905 // for unsigned arguments), -1 if negative (unsigned 906 // argument is never negative) 907 908 if (fmt_spec == 'p') { 909 return TYPE_POINTER; 910 } else if (fmt_spec == 'b' || fmt_spec == 'B') { 911 return TYPE_UNSIGNEDLONGLONGINT; 912 } else if (fmt_spec == 'd') { 913 // signed 914 switch (length_modifier) { 915 case NUL: 916 case 'h': 917 // char and short arguments are passed as int. 918 return TYPE_INT; 919 case 'l': 920 return TYPE_LONGINT; 921 case 'L': 922 return TYPE_LONGLONGINT; 923 case 'z': 924 return TYPE_SIGNEDSIZET; 925 } 926 } else { 927 // unsigned 928 switch (length_modifier) { 929 case NUL: 930 case 'h': 931 return TYPE_UNSIGNEDINT; 932 case 'l': 933 return TYPE_UNSIGNEDLONGINT; 934 case 'L': 935 return TYPE_UNSIGNEDLONGLONGINT; 936 case 'z': 937 return TYPE_SIZET; 938 } 939 } 940 break; 941 942 case 'f': 943 case 'F': 944 case 'e': 945 case 'E': 946 case 'g': 947 case 'G': 948 return TYPE_FLOAT; 949 } 950 951 return TYPE_UNKNOWN; 952 } 953 954 static char *format_typename(const char *type) 955 FUNC_ATTR_NONNULL_ALL 956 { 957 switch (format_typeof(type)) { 958 case TYPE_INT: 959 return _(typename_int); 960 case TYPE_LONGINT: 961 return _(typename_longint); 962 case TYPE_LONGLONGINT: 963 return _(typename_longlongint); 964 case TYPE_UNSIGNEDINT: 965 return _(typename_unsignedint); 966 case TYPE_SIGNEDSIZET: 967 return _(typename_signedsizet); 968 case TYPE_UNSIGNEDLONGINT: 969 return _(typename_unsignedlongint); 970 case TYPE_UNSIGNEDLONGLONGINT: 971 return _(typename_unsignedlonglongint); 972 case TYPE_SIZET: 973 return _(typename_sizet); 974 case TYPE_POINTER: 975 return _(typename_pointer); 976 case TYPE_PERCENT: 977 return _(typename_percent); 978 case TYPE_CHAR: 979 return _(typename_char); 980 case TYPE_STRING: 981 return _(typename_string); 982 case TYPE_FLOAT: 983 return _(typename_float); 984 } 985 986 return _(typename_unknown); 987 } 988 989 static int adjust_types(const char ***ap_types, int arg, int *num_posarg, const char *type) 990 FUNC_ATTR_NONNULL_ALL 991 { 992 if (arg <= 0) { 993 semsg(_(e_invalid_format_specifier_str), type); 994 return FAIL; 995 } 996 997 if (*ap_types == NULL || *num_posarg < arg) { 998 const char **new_types = *ap_types == NULL 999 ? xcalloc((size_t)arg, sizeof(const char *)) 1000 : xrealloc(*ap_types, (size_t)arg * sizeof(const char *)); 1001 1002 for (int idx = *num_posarg; idx < arg; idx++) { 1003 new_types[idx] = NULL; 1004 } 1005 1006 *ap_types = new_types; 1007 *num_posarg = arg; 1008 } 1009 1010 if ((*ap_types)[arg - 1] != NULL) { 1011 if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*') { 1012 const char *pt = type; 1013 if (pt[0] == '*') { 1014 pt = (*ap_types)[arg - 1]; 1015 } 1016 1017 if (pt[0] != '*') { 1018 switch (pt[0]) { 1019 case 'd': 1020 case 'i': 1021 break; 1022 default: 1023 semsg(_(e_positional_num_field_spec_reused_str_str), arg, 1024 format_typename((*ap_types)[arg - 1]), format_typename(type)); 1025 return FAIL; 1026 } 1027 } 1028 } else { 1029 if (format_typeof(type) != format_typeof((*ap_types)[arg - 1])) { 1030 semsg(_(e_positional_arg_num_type_inconsistent_str_str), arg, 1031 format_typename(type), format_typename((*ap_types)[arg - 1])); 1032 return FAIL; 1033 } 1034 } 1035 } 1036 1037 (*ap_types)[arg - 1] = type; 1038 1039 return OK; 1040 } 1041 1042 static void format_overflow_error(const char *pstart) 1043 { 1044 const char *p = pstart; 1045 1046 while (ascii_isdigit((int)(*p))) { 1047 p++; 1048 } 1049 1050 size_t arglen = (size_t)(p - pstart); 1051 char *argcopy = xstrnsave(pstart, arglen); 1052 semsg(_(e_val_too_large), argcopy); 1053 xfree(argcopy); 1054 } 1055 1056 enum { MAX_ALLOWED_STRING_WIDTH = 1048576, }; // 1MiB 1057 1058 static int get_unsigned_int(const char *pstart, const char **p, unsigned *uj, bool overflow_err) 1059 { 1060 *uj = (unsigned)(**p - '0'); 1061 (*p)++; 1062 1063 while (ascii_isdigit((int)(**p)) && *uj < MAX_ALLOWED_STRING_WIDTH) { 1064 *uj = 10 * *uj + (unsigned)(**p - '0'); 1065 (*p)++; 1066 } 1067 1068 if (*uj > MAX_ALLOWED_STRING_WIDTH) { 1069 if (overflow_err) { 1070 format_overflow_error(pstart); 1071 return FAIL; 1072 } else { 1073 *uj = MAX_ALLOWED_STRING_WIDTH; 1074 } 1075 } 1076 1077 return OK; 1078 } 1079 1080 static int parse_fmt_types(const char ***ap_types, int *num_posarg, const char *fmt, typval_T *tvs) 1081 FUNC_ATTR_NONNULL_ARG(1, 2) 1082 { 1083 const char *p = fmt; 1084 const char *arg = NULL; 1085 1086 int any_pos = 0; 1087 int any_arg = 0; 1088 1089 #define CHECK_POS_ARG \ 1090 do { \ 1091 if (any_pos && any_arg) { \ 1092 semsg(_(e_cannot_mix_positional_and_non_positional_str), fmt); \ 1093 goto error; \ 1094 } \ 1095 } while (0); 1096 1097 if (p == NULL) { 1098 return OK; 1099 } 1100 1101 while (*p != NUL) { 1102 if (*p != '%') { 1103 size_t n = (size_t)(xstrchrnul(p + 1, '%') - p); 1104 p += n; 1105 } else { 1106 // allowed values: \0, h, l, L 1107 char length_modifier = NUL; 1108 1109 // variable for positional arg 1110 int pos_arg = -1; 1111 const char *pstart = p + 1; 1112 1113 p++; // skip '%' 1114 1115 // First check to see if we find a positional 1116 // argument specifier 1117 const char *ptype = p; 1118 1119 while (ascii_isdigit(*ptype)) { 1120 ptype++; 1121 } 1122 1123 if (*ptype == '$') { 1124 if (*p == '0') { 1125 // 0 flag at the wrong place 1126 semsg(_(e_invalid_format_specifier_str), fmt); 1127 goto error; 1128 } 1129 1130 // Positional argument 1131 unsigned uj; 1132 1133 if (get_unsigned_int(pstart, &p, &uj, tvs != NULL) == FAIL) { 1134 goto error; 1135 } 1136 1137 pos_arg = (int)uj; 1138 1139 any_pos = 1; 1140 CHECK_POS_ARG; 1141 1142 p++; 1143 } 1144 1145 // parse flags 1146 while (*p == '0' || *p == '-' || *p == '+' || *p == ' ' 1147 || *p == '#' || *p == '\'') { 1148 switch (*p) { 1149 case '0': 1150 break; 1151 case '-': 1152 break; 1153 case '+': 1154 break; 1155 case ' ': // If both the ' ' and '+' flags appear, the ' ' 1156 // flag should be ignored 1157 break; 1158 case '#': 1159 break; 1160 case '\'': 1161 break; 1162 } 1163 p++; 1164 } 1165 // If the '0' and '-' flags both appear, the '0' flag should be 1166 // ignored. 1167 1168 // parse field width 1169 if (*(arg = p) == '*') { 1170 p++; 1171 1172 if (ascii_isdigit((int)(*p))) { 1173 // Positional argument field width 1174 unsigned uj; 1175 1176 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL) { 1177 goto error; 1178 } 1179 1180 if (*p != '$') { 1181 semsg(_(e_invalid_format_specifier_str), fmt); 1182 goto error; 1183 } else { 1184 p++; 1185 any_pos = 1; 1186 CHECK_POS_ARG; 1187 1188 if (adjust_types(ap_types, (int)uj, num_posarg, arg) == FAIL) { 1189 goto error; 1190 } 1191 } 1192 } else { 1193 any_arg = 1; 1194 CHECK_POS_ARG; 1195 } 1196 } else if (ascii_isdigit((int)(*p))) { 1197 // size_t could be wider than unsigned int; make sure we treat 1198 // argument like common implementations do 1199 const char *digstart = p; 1200 unsigned uj; 1201 1202 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) { 1203 goto error; 1204 } 1205 1206 if (*p == '$') { 1207 semsg(_(e_invalid_format_specifier_str), fmt); 1208 goto error; 1209 } 1210 } 1211 1212 // parse precision 1213 if (*p == '.') { 1214 p++; 1215 1216 if (*(arg = p) == '*') { 1217 p++; 1218 1219 if (ascii_isdigit((int)(*p))) { 1220 // Parse precision 1221 unsigned uj; 1222 1223 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL) { 1224 goto error; 1225 } 1226 1227 if (*p == '$') { 1228 any_pos = 1; 1229 CHECK_POS_ARG; 1230 1231 p++; 1232 1233 if (adjust_types(ap_types, (int)uj, num_posarg, arg) == FAIL) { 1234 goto error; 1235 } 1236 } else { 1237 semsg(_(e_invalid_format_specifier_str), fmt); 1238 goto error; 1239 } 1240 } else { 1241 any_arg = 1; 1242 CHECK_POS_ARG; 1243 } 1244 } else if (ascii_isdigit((int)(*p))) { 1245 // size_t could be wider than unsigned int; make sure we 1246 // treat argument like common implementations do 1247 const char *digstart = p; 1248 unsigned uj; 1249 1250 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) { 1251 goto error; 1252 } 1253 1254 if (*p == '$') { 1255 semsg(_(e_invalid_format_specifier_str), fmt); 1256 goto error; 1257 } 1258 } 1259 } 1260 1261 if (pos_arg != -1) { 1262 any_pos = 1; 1263 CHECK_POS_ARG; 1264 1265 ptype = p; 1266 } 1267 1268 // parse 'h', 'l', 'll' and 'z' length modifiers 1269 if (*p == 'h' || *p == 'l' || *p == 'z') { 1270 length_modifier = *p; 1271 p++; 1272 if (length_modifier == 'l' && *p == 'l') { 1273 // double l = long long 1274 // length_modifier = 'L'; 1275 p++; 1276 } 1277 } 1278 1279 switch (*p) { 1280 // Check for known format specifiers. % is special! 1281 case 'i': 1282 case '*': 1283 case 'd': 1284 case 'u': 1285 case 'o': 1286 case 'D': 1287 case 'U': 1288 case 'O': 1289 case 'x': 1290 case 'X': 1291 case 'b': 1292 case 'B': 1293 case 'c': 1294 case 's': 1295 case 'S': 1296 case 'p': 1297 case 'f': 1298 case 'F': 1299 case 'e': 1300 case 'E': 1301 case 'g': 1302 case 'G': 1303 if (pos_arg != -1) { 1304 if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL) { 1305 goto error; 1306 } 1307 } else { 1308 any_arg = 1; 1309 CHECK_POS_ARG; 1310 } 1311 break; 1312 1313 default: 1314 if (pos_arg != -1) { 1315 semsg(_(e_cannot_mix_positional_and_non_positional_str), fmt); 1316 goto error; 1317 } 1318 } 1319 1320 if (*p != NUL) { 1321 p++; // step over the just processed conversion specifier 1322 } 1323 } 1324 } 1325 1326 for (int arg_idx = 0; arg_idx < *num_posarg; arg_idx++) { 1327 if ((*ap_types)[arg_idx] == NULL) { 1328 semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt); 1329 goto error; 1330 } 1331 1332 if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN) { 1333 semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt); 1334 goto error; 1335 } 1336 } 1337 1338 return OK; 1339 1340 error: 1341 xfree(*ap_types); 1342 *ap_types = NULL; 1343 *num_posarg = 0; 1344 return FAIL; 1345 } 1346 1347 static void skip_to_arg(const char **ap_types, va_list ap_start, va_list *ap, int *arg_idx, 1348 int *arg_cur, const char *fmt) 1349 FUNC_ATTR_NONNULL_ARG(3, 4, 5) 1350 { 1351 int arg_min = 0; 1352 1353 if (*arg_cur + 1 == *arg_idx) { 1354 (*arg_cur)++; 1355 (*arg_idx)++; 1356 return; 1357 } 1358 1359 if (*arg_cur >= *arg_idx) { 1360 // Reset ap to ap_start and skip arg_idx - 1 types 1361 va_end(*ap); 1362 va_copy(*ap, ap_start); 1363 } else { 1364 // Skip over any we should skip 1365 arg_min = *arg_cur; 1366 } 1367 1368 for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; (*arg_cur)++) { 1369 if (ap_types == NULL || ap_types[*arg_cur] == NULL) { 1370 siemsg(e_aptypes_is_null_nr_str, fmt, *arg_cur); 1371 return; 1372 } 1373 1374 const char *p = ap_types[*arg_cur]; 1375 1376 int fmt_type = format_typeof(p); 1377 1378 // get parameter value, do initial processing 1379 switch (fmt_type) { 1380 case TYPE_PERCENT: 1381 case TYPE_UNKNOWN: 1382 break; 1383 1384 case TYPE_CHAR: 1385 va_arg(*ap, int); 1386 break; 1387 1388 case TYPE_STRING: 1389 va_arg(*ap, const char *); 1390 break; 1391 1392 case TYPE_POINTER: 1393 va_arg(*ap, void *); 1394 break; 1395 1396 case TYPE_INT: 1397 va_arg(*ap, int); 1398 break; 1399 1400 case TYPE_LONGINT: 1401 va_arg(*ap, long); 1402 break; 1403 1404 case TYPE_LONGLONGINT: 1405 va_arg(*ap, long long); // NOLINT(runtime/int) 1406 break; 1407 1408 case TYPE_SIGNEDSIZET: // implementation-defined, usually ptrdiff_t 1409 va_arg(*ap, ptrdiff_t); 1410 break; 1411 1412 case TYPE_UNSIGNEDINT: 1413 va_arg(*ap, unsigned); 1414 break; 1415 1416 case TYPE_UNSIGNEDLONGINT: 1417 va_arg(*ap, unsigned long); 1418 break; 1419 1420 case TYPE_UNSIGNEDLONGLONGINT: 1421 va_arg(*ap, unsigned long long); // NOLINT(runtime/int) 1422 break; 1423 1424 case TYPE_SIZET: 1425 va_arg(*ap, size_t); 1426 break; 1427 1428 case TYPE_FLOAT: 1429 va_arg(*ap, double); 1430 break; 1431 } 1432 } 1433 1434 // Because we know that after we return from this call, 1435 // a va_arg() call is made, we can pre-emptively 1436 // increment the current argument index. 1437 (*arg_cur)++; 1438 (*arg_idx)++; 1439 } 1440 1441 /// Write formatted value to the string 1442 /// 1443 /// @param[out] str String to write to. 1444 /// @param[in] str_m String length. 1445 /// @param[in] fmt String format. 1446 /// @param[in] ap Values that should be formatted. Ignored if tvs is not NULL. 1447 /// @param[in] tvs Values that should be formatted, for printf() Vimscript 1448 /// function. Must be NULL in other cases. 1449 /// 1450 /// @return Number of bytes excluding NUL byte that would be written to the 1451 /// string if str_m was greater or equal to the return value. 1452 int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap_start, 1453 typval_T *const tvs) 1454 { 1455 size_t str_l = 0; 1456 bool str_avail = str_l < str_m; 1457 const char *p = fmt; 1458 int arg_cur = 0; 1459 int num_posarg = 0; 1460 int arg_idx = 1; 1461 va_list ap; 1462 const char **ap_types = NULL; 1463 1464 if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL) { 1465 return 0; 1466 } 1467 1468 va_copy(ap, ap_start); 1469 1470 if (!p) { 1471 p = ""; 1472 } 1473 while (*p) { 1474 if (*p != '%') { 1475 // copy up to the next '%' or NUL without any changes 1476 size_t n = (size_t)(xstrchrnul(p + 1, '%') - p); 1477 if (str_avail) { 1478 size_t avail = str_m - str_l; 1479 memmove(str + str_l, p, MIN(n, avail)); 1480 str_avail = n < avail; 1481 } 1482 p += n; 1483 assert(n <= SIZE_MAX - str_l); 1484 str_l += n; 1485 } else { 1486 size_t min_field_width = 0; 1487 size_t precision = 0; 1488 bool zero_padding = false; 1489 bool precision_specified = false; 1490 bool justify_left = false; 1491 bool alternate_form = false; 1492 bool force_sign = false; 1493 1494 // if both ' ' and '+' flags appear, ' ' flag should be ignored 1495 int space_for_positive = 1; 1496 1497 // allowed values: \0, h, l, 2 (for ll), z, L 1498 char length_modifier = NUL; 1499 1500 // temporary buffer for simple numeric->string conversion 1501 #define TMP_LEN 350 // 1e308 seems reasonable as the maximum printable 1502 char tmp[TMP_LEN]; 1503 1504 // string address in case of string argument 1505 const char *str_arg = NULL; 1506 1507 // natural field width of arg without padding and sign 1508 size_t str_arg_l; 1509 1510 // unsigned char argument value (only defined for c conversion); 1511 // standard explicitly states the char argument for the c 1512 // conversion is unsigned 1513 unsigned char uchar_arg; 1514 1515 // number of zeros to be inserted for numeric conversions as 1516 // required by the precision or minimal field width 1517 size_t number_of_zeros_to_pad = 0; 1518 1519 // index into tmp where zero padding is to be inserted 1520 size_t zero_padding_insertion_ind = 0; 1521 1522 // current conversion specifier character 1523 char fmt_spec = NUL; 1524 1525 // buffer for 's' and 'S' specs 1526 char *tofree = NULL; 1527 1528 // variable for positional arg 1529 int pos_arg = -1; 1530 1531 p++; // skip '%' 1532 1533 // First check to see if we find a positional 1534 // argument specifier 1535 const char *ptype = p; 1536 1537 while (ascii_isdigit(*ptype)) { 1538 ptype++; 1539 } 1540 1541 if (*ptype == '$') { 1542 // Positional argument 1543 const char *digstart = p; 1544 unsigned uj; 1545 1546 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) { 1547 goto error; 1548 } 1549 1550 pos_arg = (int)uj; 1551 1552 p++; 1553 } 1554 1555 // parse flags 1556 while (true) { 1557 switch (*p) { 1558 case '0': 1559 zero_padding = true; p++; continue; 1560 case '-': 1561 justify_left = true; p++; continue; 1562 // if both '0' and '-' flags appear, '0' should be ignored 1563 case '+': 1564 force_sign = true; space_for_positive = 0; p++; continue; 1565 case ' ': 1566 force_sign = true; p++; continue; 1567 // if both ' ' and '+' flags appear, ' ' should be ignored 1568 case '#': 1569 alternate_form = true; p++; continue; 1570 case '\'': 1571 p++; continue; 1572 default: 1573 break; 1574 } 1575 break; 1576 } 1577 1578 // parse field width 1579 if (*p == '*') { 1580 const char *digstart = p + 1; 1581 1582 p++; 1583 1584 if (ascii_isdigit((int)(*p))) { 1585 // Positional argument field width 1586 unsigned uj; 1587 1588 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) { 1589 goto error; 1590 } 1591 1592 arg_idx = (int)uj; 1593 1594 p++; 1595 } 1596 1597 int j = (tvs 1598 ? (int)tv_nr(tvs, &arg_idx) 1599 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1600 &arg_cur, fmt), 1601 va_arg(ap, int))); 1602 1603 if (j > MAX_ALLOWED_STRING_WIDTH) { 1604 if (tvs != NULL) { 1605 format_overflow_error(digstart); 1606 goto error; 1607 } else { 1608 j = MAX_ALLOWED_STRING_WIDTH; 1609 } 1610 } 1611 1612 if (j >= 0) { 1613 min_field_width = (size_t)j; 1614 } else { 1615 min_field_width = (size_t)-j; 1616 justify_left = true; 1617 } 1618 } else if (ascii_isdigit((int)(*p))) { 1619 // size_t could be wider than unsigned int; make sure we treat 1620 // argument like common implementations do 1621 const char *digstart = p; 1622 unsigned uj; 1623 1624 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) { 1625 goto error; 1626 } 1627 1628 min_field_width = uj; 1629 } 1630 1631 // parse precision 1632 if (*p == '.') { 1633 p++; 1634 precision_specified = true; 1635 1636 if (ascii_isdigit((int)(*p))) { 1637 // size_t could be wider than unsigned int; make sure we 1638 // treat argument like common implementations do 1639 const char *digstart = p; 1640 unsigned uj; 1641 1642 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) { 1643 goto error; 1644 } 1645 1646 precision = uj; 1647 } else if (*p == '*') { 1648 const char *digstart = p; 1649 1650 p++; 1651 1652 if (ascii_isdigit((int)(*p))) { 1653 // positional argument 1654 unsigned uj; 1655 1656 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) { 1657 goto error; 1658 } 1659 1660 arg_idx = (int)uj; 1661 1662 p++; 1663 } 1664 1665 int j = (tvs 1666 ? (int)tv_nr(tvs, &arg_idx) 1667 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1668 &arg_cur, fmt), 1669 va_arg(ap, int))); 1670 1671 if (j > MAX_ALLOWED_STRING_WIDTH) { 1672 if (tvs != NULL) { 1673 format_overflow_error(digstart); 1674 goto error; 1675 } else { 1676 j = MAX_ALLOWED_STRING_WIDTH; 1677 } 1678 } 1679 1680 if (j >= 0) { 1681 precision = (size_t)j; 1682 } else { 1683 precision_specified = false; 1684 precision = 0; 1685 } 1686 } 1687 } 1688 1689 // parse 'h', 'l', 'll' and 'z' length modifiers 1690 if (*p == 'h' || *p == 'l' || *p == 'z') { 1691 length_modifier = *p; 1692 p++; 1693 if (length_modifier == 'l' && *p == 'l') { 1694 // double l = long long 1695 length_modifier = 'L'; 1696 p++; 1697 } 1698 } 1699 1700 fmt_spec = *p; 1701 1702 // common synonyms 1703 switch (fmt_spec) { 1704 case 'i': 1705 fmt_spec = 'd'; break; 1706 case 'D': 1707 fmt_spec = 'd'; length_modifier = 'l'; break; 1708 case 'U': 1709 fmt_spec = 'u'; length_modifier = 'l'; break; 1710 case 'O': 1711 fmt_spec = 'o'; length_modifier = 'l'; break; 1712 default: 1713 break; 1714 } 1715 1716 switch (fmt_spec) { 1717 case 'd': 1718 case 'u': 1719 case 'o': 1720 case 'x': 1721 case 'X': 1722 if (tvs && length_modifier == NUL) { 1723 length_modifier = 'L'; 1724 } 1725 } 1726 1727 if (pos_arg != -1) { 1728 arg_idx = pos_arg; 1729 } 1730 1731 // get parameter value, do initial processing 1732 switch (fmt_spec) { 1733 // '%' and 'c' behave similar to 's' regarding flags and field widths 1734 case '%': 1735 case 'c': 1736 case 's': 1737 case 'S': 1738 str_arg_l = 1; 1739 switch (fmt_spec) { 1740 case '%': 1741 str_arg = p; 1742 break; 1743 1744 case 'c': { 1745 const int j = (tvs 1746 ? (int)tv_nr(tvs, &arg_idx) 1747 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1748 &arg_cur, fmt), 1749 va_arg(ap, int))); 1750 1751 // standard demands unsigned char 1752 uchar_arg = (unsigned char)j; 1753 str_arg = (char *)&uchar_arg; 1754 break; 1755 } 1756 1757 case 's': 1758 case 'S': 1759 str_arg = (tvs 1760 ? tv_str(tvs, &arg_idx, &tofree) 1761 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1762 &arg_cur, fmt), 1763 va_arg(ap, const char *))); 1764 1765 if (!str_arg) { 1766 str_arg = "[NULL]"; 1767 str_arg_l = 6; 1768 } else if (!precision_specified) { 1769 // make sure not to address string beyond the specified 1770 // precision 1771 str_arg_l = strlen(str_arg); 1772 } else if (precision == 0) { 1773 // truncate string if necessary as requested by precision 1774 str_arg_l = 0; 1775 } else { 1776 // memchr on HP does not like n > 2^31 1777 // TODO(elmart): check if this still holds / is relevant 1778 str_arg_l = (size_t)((char *)xmemscan(str_arg, 1779 NUL, 1780 MIN(precision, 1781 0x7fffffff)) 1782 - str_arg); 1783 } 1784 if (fmt_spec == 'S') { 1785 const char *p1; 1786 size_t i; 1787 1788 for (i = 0, p1 = str_arg; *p1; p1 += utfc_ptr2len(p1)) { 1789 size_t cell = (size_t)utf_ptr2cells(p1); 1790 if (precision_specified && i + cell > precision) { 1791 break; 1792 } 1793 i += cell; 1794 } 1795 1796 str_arg_l = (size_t)(p1 - str_arg); 1797 if (min_field_width != 0) { 1798 min_field_width += str_arg_l - i; 1799 } 1800 } 1801 break; 1802 1803 default: 1804 break; 1805 } 1806 break; 1807 1808 case 'd': 1809 case 'u': 1810 case 'b': 1811 case 'B': 1812 case 'o': 1813 case 'x': 1814 case 'X': 1815 case 'p': { 1816 // u, b, B, o, x, X and p conversion specifiers imply 1817 // the value is unsigned; d implies a signed value 1818 1819 // 0 if numeric argument is zero (or if pointer is NULL for 'p'), 1820 // +1 if greater than zero (or non NULL for 'p'), 1821 // -1 if negative (unsigned argument is never negative) 1822 int arg_sign = 0; 1823 1824 intmax_t arg = 0; 1825 uintmax_t uarg = 0; 1826 1827 // only defined for p conversion 1828 const void *ptr_arg = NULL; 1829 1830 if (fmt_spec == 'p') { 1831 ptr_arg = (tvs 1832 ? tv_ptr(tvs, &arg_idx) 1833 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1834 &arg_cur, fmt), 1835 va_arg(ap, void *))); 1836 1837 if (ptr_arg) { 1838 arg_sign = 1; 1839 } 1840 } else if (fmt_spec == 'b' || fmt_spec == 'B') { 1841 uarg = (tvs 1842 ? (unsigned long long)tv_nr(tvs, &arg_idx) // NOLINT(runtime/int) 1843 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1844 &arg_cur, fmt), 1845 va_arg(ap, unsigned long long))); // NOLINT(runtime/int) 1846 arg_sign = (uarg != 0); 1847 } else if (fmt_spec == 'd') { 1848 // signed 1849 switch (length_modifier) { 1850 case NUL: 1851 arg = (tvs 1852 ? (int)tv_nr(tvs, &arg_idx) 1853 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1854 &arg_cur, fmt), 1855 va_arg(ap, int))); 1856 break; 1857 case 'h': 1858 // char and short arguments are passed as int16_t 1859 arg = (int16_t) 1860 (tvs 1861 ? (int)tv_nr(tvs, &arg_idx) 1862 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1863 &arg_cur, fmt), 1864 va_arg(ap, int))); 1865 break; 1866 case 'l': 1867 arg = (tvs 1868 ? (long)tv_nr(tvs, &arg_idx) 1869 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1870 &arg_cur, fmt), 1871 va_arg(ap, long))); 1872 break; 1873 case 'L': 1874 arg = (tvs 1875 ? (long long)tv_nr(tvs, &arg_idx) // NOLINT(runtime/int) 1876 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1877 &arg_cur, fmt), 1878 va_arg(ap, long long))); // NOLINT(runtime/int) 1879 break; 1880 case 'z': // implementation-defined, usually ptrdiff_t 1881 arg = (tvs 1882 ? (ptrdiff_t)tv_nr(tvs, &arg_idx) 1883 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1884 &arg_cur, fmt), 1885 va_arg(ap, ptrdiff_t))); 1886 break; 1887 } 1888 if (arg > 0) { 1889 arg_sign = 1; 1890 } else if (arg < 0) { 1891 arg_sign = -1; 1892 } 1893 } else { 1894 // unsigned 1895 switch (length_modifier) { 1896 case NUL: 1897 uarg = (tvs 1898 ? (unsigned)tv_nr(tvs, &arg_idx) 1899 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1900 &arg_cur, fmt), 1901 va_arg(ap, unsigned))); 1902 break; 1903 case 'h': 1904 uarg = (uint16_t) 1905 (tvs 1906 ? (unsigned)tv_nr(tvs, &arg_idx) 1907 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1908 &arg_cur, fmt), 1909 va_arg(ap, unsigned))); 1910 break; 1911 case 'l': 1912 uarg = (tvs 1913 ? (unsigned long)tv_nr(tvs, &arg_idx) 1914 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1915 &arg_cur, fmt), 1916 va_arg(ap, unsigned long))); 1917 break; 1918 case 'L': 1919 uarg = (tvs 1920 ? (unsigned long long)tv_nr(tvs, &arg_idx) // NOLINT(runtime/int) 1921 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1922 &arg_cur, fmt), 1923 va_arg(ap, unsigned long long))); // NOLINT(runtime/int) 1924 break; 1925 case 'z': 1926 uarg = (tvs 1927 ? (size_t)tv_nr(tvs, &arg_idx) 1928 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 1929 &arg_cur, fmt), 1930 va_arg(ap, size_t))); 1931 break; 1932 } 1933 arg_sign = (uarg != 0); 1934 } 1935 1936 str_arg = tmp; 1937 str_arg_l = 0; 1938 1939 // For d, i, u, o, x, and X conversions, if precision is specified, 1940 // '0' flag should be ignored. This is so with Solaris 2.6, Digital 1941 // UNIX 4.0, HPUX 10, Linux, FreeBSD, NetBSD; but not with Perl. 1942 if (precision_specified) { 1943 zero_padding = false; 1944 } 1945 1946 if (fmt_spec == 'd') { 1947 if (force_sign && arg_sign >= 0) { 1948 tmp[str_arg_l++] = space_for_positive ? ' ' : '+'; 1949 } 1950 // leave negative numbers for snprintf to handle, to 1951 // avoid handling tricky cases like (short int)-32768 1952 } else if (alternate_form) { 1953 if (arg_sign != 0 && (fmt_spec == 'x' || fmt_spec == 'X' 1954 || fmt_spec == 'b' || fmt_spec == 'B')) { 1955 tmp[str_arg_l++] = '0'; 1956 tmp[str_arg_l++] = fmt_spec; 1957 } 1958 // alternate form should have no effect for p * conversion, but ... 1959 } 1960 1961 zero_padding_insertion_ind = str_arg_l; 1962 if (!precision_specified) { 1963 precision = 1; // default precision is 1 1964 } 1965 if (precision == 0 && arg_sign == 0) { 1966 // when zero value is formatted with an explicit precision 0, 1967 // resulting formatted string is empty (d, i, u, b, B, o, x, X, p) 1968 } else { 1969 switch (fmt_spec) { 1970 case 'p': // pointer 1971 str_arg_l += (size_t)snprintf(tmp + str_arg_l, 1972 sizeof(tmp) - str_arg_l, 1973 "%p", ptr_arg); 1974 break; 1975 case 'd': // signed 1976 str_arg_l += (size_t)snprintf(tmp + str_arg_l, 1977 sizeof(tmp) - str_arg_l, 1978 "%" PRIdMAX, arg); 1979 break; 1980 case 'b': 1981 case 'B': { // binary 1982 size_t bits = 0; 1983 for (bits = sizeof(uintmax_t) * 8; bits > 0; bits--) { 1984 if ((uarg >> (bits - 1)) & 0x1) { 1985 break; 1986 } 1987 } 1988 1989 while (bits > 0) { 1990 tmp[str_arg_l++] = ((uarg >> --bits) & 0x1) ? '1' : '0'; 1991 } 1992 break; 1993 } 1994 default: { // unsigned 1995 // construct a simple format string for snprintf 1996 char f[] = "%" PRIuMAX; 1997 f[sizeof("%" PRIuMAX) - 1 - 1] = fmt_spec; 1998 assert(PRIuMAX[sizeof(PRIuMAX) - 1 - 1] == 'u'); 1999 str_arg_l += (size_t)snprintf(tmp + str_arg_l, 2000 sizeof(tmp) - str_arg_l, 2001 f, uarg); 2002 break; 2003 } 2004 } 2005 assert(str_arg_l < sizeof(tmp)); 2006 2007 // include the optional minus sign and possible "0x" in the region 2008 // before the zero padding insertion point 2009 if (zero_padding_insertion_ind < str_arg_l 2010 && tmp[zero_padding_insertion_ind] == '-') { 2011 zero_padding_insertion_ind++; 2012 } 2013 if (zero_padding_insertion_ind + 1 < str_arg_l 2014 && tmp[zero_padding_insertion_ind] == '0' 2015 && (tmp[zero_padding_insertion_ind + 1] == 'x' 2016 || tmp[zero_padding_insertion_ind + 1] == 'X' 2017 || tmp[zero_padding_insertion_ind + 1] == 'b' 2018 || tmp[zero_padding_insertion_ind + 1] == 'B')) { 2019 zero_padding_insertion_ind += 2; 2020 } 2021 } 2022 2023 { 2024 size_t num_of_digits = str_arg_l - zero_padding_insertion_ind; 2025 2026 if (alternate_form && fmt_spec == 'o' 2027 // unless zero is already the first character 2028 && !(zero_padding_insertion_ind < str_arg_l 2029 && tmp[zero_padding_insertion_ind] == '0')) { 2030 // assure leading zero for alternate-form octal numbers 2031 if (!precision_specified || precision < num_of_digits + 1) { 2032 // precision is increased to force the first character to be 2033 // zero, except if a zero value is formatted with an explicit 2034 // precision of zero 2035 precision = num_of_digits + 1; 2036 } 2037 } 2038 // zero padding to specified precision? 2039 if (num_of_digits < precision) { 2040 number_of_zeros_to_pad = precision - num_of_digits; 2041 } 2042 } 2043 // zero padding to specified minimal field width? 2044 if (!justify_left && zero_padding) { 2045 const int n = (int)(min_field_width - (str_arg_l 2046 + number_of_zeros_to_pad)); 2047 if (n > 0) { 2048 number_of_zeros_to_pad += (size_t)n; 2049 } 2050 } 2051 break; 2052 } 2053 2054 case 'f': 2055 case 'F': 2056 case 'e': 2057 case 'E': 2058 case 'g': 2059 case 'G': { 2060 // floating point 2061 char format[40]; 2062 bool remove_trailing_zeroes = false; 2063 2064 double f = (tvs 2065 ? tv_float(tvs, &arg_idx) 2066 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, 2067 &arg_cur, fmt), 2068 va_arg(ap, double))); 2069 2070 double abs_f = f < 0 ? -f : f; 2071 2072 if (fmt_spec == 'g' || fmt_spec == 'G') { 2073 // can't use %g directly, cause it prints "1.0" as "1" 2074 if ((abs_f >= 0.001 && abs_f < 10000000.0) || abs_f == 0.0) { 2075 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f'; 2076 } else { 2077 fmt_spec = fmt_spec == 'g' ? 'e' : 'E'; 2078 } 2079 remove_trailing_zeroes = true; 2080 } 2081 2082 if (xisinf(f) 2083 || (strchr("fF", fmt_spec) != NULL && abs_f > 1.0e307)) { 2084 xstrlcpy(tmp, infinity_str(f > 0.0, fmt_spec, 2085 force_sign, space_for_positive), 2086 sizeof(tmp)); 2087 str_arg_l = strlen(tmp); 2088 zero_padding = false; 2089 } else if (xisnan(f)) { 2090 // Not a number: nan or NAN 2091 memmove(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN" : "nan", 4); 2092 str_arg_l = 3; 2093 zero_padding = false; 2094 } else { 2095 // Regular float number 2096 format[0] = '%'; 2097 size_t l = 1; 2098 if (force_sign) { 2099 format[l++] = space_for_positive ? ' ' : '+'; 2100 } 2101 if (precision_specified) { 2102 size_t max_prec = TMP_LEN - 10; 2103 2104 // make sure we don't get more digits than we have room for 2105 if ((fmt_spec == 'f' || fmt_spec == 'F') && abs_f > 1.0) { 2106 max_prec -= (size_t)log10(abs_f); 2107 } 2108 if (precision > max_prec) { 2109 precision = max_prec; 2110 } 2111 l += (size_t)snprintf(format + l, sizeof(format) - l, ".%d", 2112 (int)precision); 2113 } 2114 2115 // Cast to char to avoid a conversion warning on Ubuntu 12.04. 2116 assert(l + 1 < sizeof(format)); 2117 format[l] = (char)(fmt_spec == 'F' ? 'f' : fmt_spec); 2118 format[l + 1] = NUL; 2119 2120 str_arg_l = (size_t)snprintf(tmp, sizeof(tmp), format, f); 2121 assert(str_arg_l < sizeof(tmp)); 2122 2123 if (remove_trailing_zeroes) { 2124 char *tp; 2125 2126 // using %g or %G: remove superfluous zeroes 2127 if (fmt_spec == 'f' || fmt_spec == 'F') { 2128 tp = tmp + str_arg_l - 1; 2129 } else { 2130 tp = vim_strchr(tmp, fmt_spec == 'e' ? 'e' : 'E'); 2131 if (tp) { 2132 // remove superfluous '+' and leading zeroes from exponent 2133 if (tp[1] == '+') { 2134 // change "1.0e+07" to "1.0e07" 2135 STRMOVE(tp + 1, tp + 2); 2136 str_arg_l--; 2137 } 2138 int i = (tp[1] == '-') ? 2 : 1; 2139 while (tp[i] == '0') { 2140 // change "1.0e07" to "1.0e7" 2141 STRMOVE(tp + i, tp + i + 1); 2142 str_arg_l--; 2143 } 2144 tp--; 2145 } 2146 } 2147 2148 if (tp != NULL && !precision_specified) { 2149 // remove trailing zeroes, but keep the one just after a dot 2150 while (tp > tmp + 2 && *tp == '0' && tp[-1] != '.') { 2151 STRMOVE(tp, tp + 1); 2152 tp--; 2153 str_arg_l--; 2154 } 2155 } 2156 } else { 2157 // Be consistent: some printf("%e") use 1.0e+12 and some 2158 // 1.0e+012; remove one zero in the last case. 2159 char *tp = vim_strchr(tmp, fmt_spec == 'e' ? 'e' : 'E'); 2160 if (tp && (tp[1] == '+' || tp[1] == '-') && tp[2] == '0' 2161 && ascii_isdigit(tp[3]) && ascii_isdigit(tp[4])) { 2162 STRMOVE(tp + 2, tp + 3); 2163 str_arg_l--; 2164 } 2165 } 2166 } 2167 if (zero_padding && min_field_width > str_arg_l 2168 && (tmp[0] == '-' || force_sign)) { 2169 // Padding 0's should be inserted after the sign. 2170 number_of_zeros_to_pad = min_field_width - str_arg_l; 2171 zero_padding_insertion_ind = 1; 2172 } 2173 str_arg = tmp; 2174 break; 2175 } 2176 2177 default: 2178 // unrecognized conversion specifier, keep format string as-is 2179 zero_padding = false; // turn zero padding off for non-numeric conversion 2180 justify_left = true; 2181 min_field_width = 0; // reset flags 2182 2183 // discard the unrecognized conversion, just keep 2184 // the unrecognized conversion character 2185 str_arg = p; 2186 str_arg_l = 0; 2187 if (*p) { 2188 str_arg_l++; // include invalid conversion specifier 2189 } 2190 // unchanged if not at end-of-string 2191 break; 2192 } 2193 2194 if (*p) { 2195 p++; // step over the just processed conversion specifier 2196 } 2197 2198 // insert padding to the left as requested by min_field_width; 2199 // this does not include the zero padding in case of numerical conversions 2200 if (!justify_left) { 2201 assert(str_arg_l <= SIZE_MAX - number_of_zeros_to_pad); 2202 if (min_field_width > str_arg_l + number_of_zeros_to_pad) { 2203 // left padding with blank or zero 2204 size_t pn = min_field_width - (str_arg_l + number_of_zeros_to_pad); 2205 if (str_avail) { 2206 size_t avail = str_m - str_l; 2207 memset(str + str_l, zero_padding ? '0' : ' ', MIN(pn, avail)); 2208 str_avail = pn < avail; 2209 } 2210 assert(pn <= SIZE_MAX - str_l); 2211 str_l += pn; 2212 } 2213 } 2214 2215 // zero padding as requested by the precision or by the minimal 2216 // field width for numeric conversions required? 2217 if (number_of_zeros_to_pad == 0) { 2218 // will not copy first part of numeric right now, 2219 // force it to be copied later in its entirety 2220 zero_padding_insertion_ind = 0; 2221 } else { 2222 // insert first part of numerics (sign or '0x') before zero padding 2223 if (zero_padding_insertion_ind > 0) { 2224 size_t zn = zero_padding_insertion_ind; 2225 if (str_avail) { 2226 size_t avail = str_m - str_l; 2227 memmove(str + str_l, str_arg, MIN(zn, avail)); 2228 str_avail = zn < avail; 2229 } 2230 assert(zn <= SIZE_MAX - str_l); 2231 str_l += zn; 2232 } 2233 2234 // insert zero padding as requested by precision or min field width 2235 size_t zn = number_of_zeros_to_pad; 2236 if (str_avail) { 2237 size_t avail = str_m - str_l; 2238 memset(str + str_l, '0', MIN(zn, avail)); 2239 str_avail = zn < avail; 2240 } 2241 assert(zn <= SIZE_MAX - str_l); 2242 str_l += zn; 2243 } 2244 2245 // insert formatted string 2246 // (or as-is conversion specifier for unknown conversions) 2247 if (str_arg_l > zero_padding_insertion_ind) { 2248 size_t sn = str_arg_l - zero_padding_insertion_ind; 2249 if (str_avail) { 2250 size_t avail = str_m - str_l; 2251 memmove(str + str_l, 2252 str_arg + zero_padding_insertion_ind, 2253 MIN(sn, avail)); 2254 str_avail = sn < avail; 2255 } 2256 assert(sn <= SIZE_MAX - str_l); 2257 str_l += sn; 2258 } 2259 2260 // insert right padding 2261 if (justify_left) { 2262 assert(str_arg_l <= SIZE_MAX - number_of_zeros_to_pad); 2263 if (min_field_width > str_arg_l + number_of_zeros_to_pad) { 2264 // right blank padding to the field width 2265 size_t pn = min_field_width - (str_arg_l + number_of_zeros_to_pad); 2266 if (str_avail) { 2267 size_t avail = str_m - str_l; 2268 memset(str + str_l, ' ', MIN(pn, avail)); 2269 str_avail = pn < avail; 2270 } 2271 assert(pn <= SIZE_MAX - str_l); 2272 str_l += pn; 2273 } 2274 } 2275 2276 xfree(tofree); 2277 } 2278 } 2279 2280 if (str_m > 0) { 2281 // make sure the string is nul-terminated even at the expense of 2282 // overwriting the last character (shouldn't happen, but just in case) 2283 str[str_l <= str_m - 1 ? str_l : str_m - 1] = NUL; 2284 } 2285 2286 if (tvs != NULL 2287 && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN) { 2288 emsg(_("E767: Too many arguments to printf()")); 2289 } 2290 2291 error: 2292 xfree(ap_types); 2293 va_end(ap); 2294 2295 // return the number of characters formatted (excluding trailing nul 2296 // character); that is, the number of characters that would have been 2297 // written to the buffer if it were large enough. 2298 return (int)str_l; 2299 } 2300 2301 int kv_do_printf(StringBuilder *str, const char *fmt, ...) 2302 FUNC_ATTR_PRINTF(2, 3) 2303 { 2304 size_t remaining = str->capacity - str->size; 2305 2306 va_list ap; 2307 va_start(ap, fmt); 2308 int printed = vsnprintf(str->items ? str->items + str->size : NULL, remaining, fmt, ap); 2309 va_end(ap); 2310 2311 if (printed < 0) { 2312 return -1; 2313 } 2314 2315 // printed string didn't fit, resize and try again 2316 if ((size_t)printed >= remaining) { 2317 kv_ensure_space(*str, (size_t)printed + 1); // include space for NUL terminator at the end 2318 assert(str->items != NULL); 2319 va_start(ap, fmt); 2320 printed = vsnprintf(str->items + str->size, str->capacity - str->size, fmt, ap); 2321 va_end(ap); 2322 if (printed < 0) { 2323 return -1; 2324 } 2325 } 2326 2327 str->size += (size_t)printed; 2328 return printed; 2329 } 2330 2331 String arena_printf(Arena *arena, const char *fmt, ...) 2332 FUNC_ATTR_PRINTF(2, 3) 2333 { 2334 size_t remaining = 0; 2335 char *buf = NULL; 2336 if (arena) { 2337 if (!arena->cur_blk) { 2338 arena_alloc_block(arena); 2339 } 2340 2341 // happy case, we can fit the printed string in the rest of the current 2342 // block (one pass): 2343 remaining = arena->size - arena->pos; 2344 buf = arena->cur_blk + arena->pos; 2345 } 2346 2347 va_list ap; 2348 va_start(ap, fmt); 2349 int printed = vsnprintf(buf, remaining, fmt, ap); 2350 va_end(ap); 2351 2352 if (printed < 0) { 2353 return (String)STRING_INIT; 2354 } 2355 2356 // printed string didn't fit, allocate and try again 2357 if ((size_t)printed >= remaining) { 2358 buf = arena_alloc(arena, (size_t)printed + 1, false); 2359 va_start(ap, fmt); 2360 printed = vsnprintf(buf, (size_t)printed + 1, fmt, ap); 2361 va_end(ap); 2362 if (printed < 0) { 2363 return (String)STRING_INIT; 2364 } 2365 } else { 2366 arena->pos += (size_t)printed + 1; 2367 } 2368 2369 return cbuf_as_string(buf, (size_t)printed); 2370 } 2371 2372 /// Reverse text into allocated memory. 2373 /// 2374 /// @return the allocated string. 2375 char *reverse_text(char *s) 2376 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_NONNULL_RET 2377 { 2378 size_t len = strlen(s); 2379 char *rev = xmalloc(len + 1); 2380 for (size_t s_i = 0, rev_i = len; s_i < len; s_i++) { 2381 const int mb_len = utfc_ptr2len(s + s_i); 2382 rev_i -= (size_t)mb_len; 2383 memmove(rev + rev_i, s + s_i, (size_t)mb_len); 2384 s_i += (size_t)mb_len - 1; 2385 } 2386 rev[len] = NUL; 2387 return rev; 2388 } 2389 2390 /// Replace all occurrences of "what" with "rep" in "src". If no replacement happens then NULL is 2391 /// returned otherwise return a newly allocated string. 2392 /// 2393 /// @param[in] src Source text 2394 /// @param[in] what Substring to replace 2395 /// @param[in] rep Substring to replace with 2396 /// 2397 /// @return [allocated] Copy of the string. 2398 char *strrep(const char *src, const char *what, const char *rep) 2399 { 2400 const char *pos = src; 2401 size_t whatlen = strlen(what); 2402 2403 // Count occurrences 2404 size_t count = 0; 2405 while ((pos = strstr(pos, what)) != NULL) { 2406 count++; 2407 pos += whatlen; 2408 } 2409 2410 if (count == 0) { 2411 return NULL; 2412 } 2413 2414 size_t replen = strlen(rep); 2415 char *ret = xmalloc(strlen(src) + count * (replen - whatlen) + 1); 2416 char *ptr = ret; 2417 while ((pos = strstr(src, what)) != NULL) { 2418 size_t idx = (size_t)(pos - src); 2419 memcpy(ptr, src, idx); 2420 ptr += idx; 2421 STRCPY(ptr, rep); 2422 ptr += replen; 2423 src = pos + whatlen; 2424 } 2425 2426 // Copy remaining 2427 STRCPY(ptr, src); 2428 2429 return ret; 2430 } 2431 2432 /// Implementation of "byteidx()" and "byteidxcomp()" functions 2433 static void byteidx_common(typval_T *argvars, typval_T *rettv, bool comp) 2434 { 2435 rettv->vval.v_number = -1; 2436 2437 const char *const str = tv_get_string_chk(&argvars[0]); 2438 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL); 2439 if (str == NULL || idx < 0) { 2440 return; 2441 } 2442 2443 varnumber_T utf16idx = false; 2444 if (argvars[2].v_type != VAR_UNKNOWN) { 2445 bool error = false; 2446 utf16idx = tv_get_bool_chk(&argvars[2], &error); 2447 if (error) { 2448 return; 2449 } 2450 if (utf16idx < 0 || utf16idx > 1) { 2451 semsg(_(e_using_number_as_bool_nr), utf16idx); 2452 return; 2453 } 2454 } 2455 2456 int (*ptr2len)(const char *); 2457 if (comp) { 2458 ptr2len = utf_ptr2len; 2459 } else { 2460 ptr2len = utfc_ptr2len; 2461 } 2462 2463 const char *t = str; 2464 for (; idx > 0; idx--) { 2465 if (*t == NUL) { // EOL reached. 2466 return; 2467 } 2468 if (utf16idx) { 2469 const int clen = ptr2len(t); 2470 const int c = (clen > 1) ? utf_ptr2char(t) : *t; 2471 if (c > 0xFFFF) { 2472 idx--; 2473 } 2474 } 2475 if (idx > 0) { 2476 t += ptr2len(t); 2477 } 2478 } 2479 rettv->vval.v_number = (varnumber_T)(t - str); 2480 } 2481 2482 /// "byteidx()" function 2483 void f_byteidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2484 { 2485 byteidx_common(argvars, rettv, false); 2486 } 2487 2488 /// "byteidxcomp()" function 2489 void f_byteidxcomp(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2490 { 2491 byteidx_common(argvars, rettv, true); 2492 } 2493 2494 /// "charidx()" function 2495 void f_charidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2496 { 2497 rettv->vval.v_number = -1; 2498 2499 if (tv_check_for_string_arg(argvars, 0) == FAIL 2500 || tv_check_for_number_arg(argvars, 1) == FAIL 2501 || tv_check_for_opt_bool_arg(argvars, 2) == FAIL 2502 || (argvars[2].v_type != VAR_UNKNOWN 2503 && tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) { 2504 return; 2505 } 2506 2507 const char *const str = tv_get_string_chk(&argvars[0]); 2508 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL); 2509 if (str == NULL || idx < 0) { 2510 return; 2511 } 2512 2513 varnumber_T countcc = false; 2514 varnumber_T utf16idx = false; 2515 if (argvars[2].v_type != VAR_UNKNOWN) { 2516 countcc = tv_get_bool(&argvars[2]); 2517 if (argvars[3].v_type != VAR_UNKNOWN) { 2518 utf16idx = tv_get_bool(&argvars[3]); 2519 } 2520 } 2521 2522 int (*ptr2len)(const char *); 2523 if (countcc) { 2524 ptr2len = utf_ptr2len; 2525 } else { 2526 ptr2len = utfc_ptr2len; 2527 } 2528 2529 const char *p; 2530 int len; 2531 for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++) { 2532 if (*p == NUL) { 2533 // If the index is exactly the number of bytes or utf-16 code units 2534 // in the string then return the length of the string in characters. 2535 if (utf16idx ? (idx == 0) : (p == (str + idx))) { 2536 rettv->vval.v_number = len; 2537 } 2538 return; 2539 } 2540 if (utf16idx) { 2541 idx--; 2542 const int clen = ptr2len(p); 2543 const int c = (clen > 1) ? utf_ptr2char(p) : *p; 2544 if (c > 0xFFFF) { 2545 idx--; 2546 } 2547 } 2548 p += ptr2len(p); 2549 } 2550 2551 rettv->vval.v_number = len > 0 ? len - 1 : 0; 2552 } 2553 2554 /// "str2list()" function 2555 void f_str2list(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2556 { 2557 tv_list_alloc_ret(rettv, kListLenUnknown); 2558 const char *p = tv_get_string(&argvars[0]); 2559 2560 for (; *p != NUL; p += utf_ptr2len(p)) { 2561 tv_list_append_number(rettv->vval.v_list, utf_ptr2char(p)); 2562 } 2563 } 2564 2565 /// "str2nr()" function 2566 void f_str2nr(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2567 { 2568 int base = 10; 2569 int what = 0; 2570 2571 if (argvars[1].v_type != VAR_UNKNOWN) { 2572 base = (int)tv_get_number(&argvars[1]); 2573 if (base != 2 && base != 8 && base != 10 && base != 16) { 2574 emsg(_(e_invarg)); 2575 return; 2576 } 2577 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2])) { 2578 what |= STR2NR_QUOTE; 2579 } 2580 } 2581 2582 char *p = skipwhite(tv_get_string(&argvars[0])); 2583 bool isneg = (*p == '-'); 2584 if (*p == '+' || *p == '-') { 2585 p = skipwhite(p + 1); 2586 } 2587 switch (base) { 2588 case 2: 2589 what |= STR2NR_BIN | STR2NR_FORCE; 2590 break; 2591 case 8: 2592 what |= STR2NR_OCT | STR2NR_OOCT | STR2NR_FORCE; 2593 break; 2594 case 16: 2595 what |= STR2NR_HEX | STR2NR_FORCE; 2596 break; 2597 } 2598 varnumber_T n; 2599 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, false, NULL); 2600 // Text after the number is silently ignored. 2601 if (isneg) { 2602 rettv->vval.v_number = -n; 2603 } else { 2604 rettv->vval.v_number = n; 2605 } 2606 } 2607 2608 /// "strgetchar()" function 2609 void f_strgetchar(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2610 { 2611 rettv->vval.v_number = -1; 2612 2613 const char *const str = tv_get_string_chk(&argvars[0]); 2614 if (str == NULL) { 2615 return; 2616 } 2617 bool error = false; 2618 varnumber_T charidx = tv_get_number_chk(&argvars[1], &error); 2619 if (error) { 2620 return; 2621 } 2622 2623 const size_t len = strlen(str); 2624 size_t byteidx = 0; 2625 2626 while (charidx >= 0 && byteidx < len) { 2627 if (charidx == 0) { 2628 rettv->vval.v_number = utf_ptr2char(str + byteidx); 2629 break; 2630 } 2631 charidx--; 2632 byteidx += (size_t)utf_ptr2len(str + byteidx); 2633 } 2634 } 2635 2636 /// "stridx()" function 2637 void f_stridx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2638 { 2639 rettv->vval.v_number = -1; 2640 2641 char buf[NUMBUFLEN]; 2642 const char *const needle = tv_get_string_chk(&argvars[1]); 2643 const char *haystack = tv_get_string_buf_chk(&argvars[0], buf); 2644 const char *const haystack_start = haystack; 2645 if (needle == NULL || haystack == NULL) { 2646 return; // Type error; errmsg already given. 2647 } 2648 2649 if (argvars[2].v_type != VAR_UNKNOWN) { 2650 bool error = false; 2651 2652 const ptrdiff_t start_idx = (ptrdiff_t)tv_get_number_chk(&argvars[2], 2653 &error); 2654 if (error || start_idx >= (ptrdiff_t)strlen(haystack)) { 2655 return; 2656 } 2657 if (start_idx >= 0) { 2658 haystack += start_idx; 2659 } 2660 } 2661 2662 const char *pos = strstr(haystack, needle); 2663 if (pos != NULL) { 2664 rettv->vval.v_number = (varnumber_T)(pos - haystack_start); 2665 } 2666 } 2667 2668 /// "string()" function 2669 void f_string(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2670 { 2671 rettv->v_type = VAR_STRING; 2672 rettv->vval.v_string = encode_tv2string(&argvars[0], NULL); 2673 } 2674 2675 /// "strlen()" function 2676 void f_strlen(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2677 { 2678 rettv->vval.v_number = (varnumber_T)strlen(tv_get_string(&argvars[0])); 2679 } 2680 2681 static void strchar_common(typval_T *argvars, typval_T *rettv, bool skipcc) 2682 { 2683 const char *s = tv_get_string(&argvars[0]); 2684 varnumber_T len = 0; 2685 int (*func_mb_ptr2char_adv)(const char **pp); 2686 2687 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv; 2688 while (*s != NUL) { 2689 func_mb_ptr2char_adv(&s); 2690 len++; 2691 } 2692 rettv->vval.v_number = len; 2693 } 2694 2695 /// "strcharlen()" function 2696 void f_strcharlen(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2697 { 2698 strchar_common(argvars, rettv, true); 2699 } 2700 2701 /// "strchars()" function 2702 void f_strchars(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2703 { 2704 varnumber_T skipcc = false; 2705 2706 if (argvars[1].v_type != VAR_UNKNOWN) { 2707 bool error = false; 2708 skipcc = tv_get_bool_chk(&argvars[1], &error); 2709 if (error) { 2710 return; 2711 } 2712 if (skipcc < 0 || skipcc > 1) { 2713 semsg(_(e_using_number_as_bool_nr), skipcc); 2714 return; 2715 } 2716 } 2717 2718 strchar_common(argvars, rettv, skipcc); 2719 } 2720 2721 /// "strutf16len()" function 2722 void f_strutf16len(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2723 { 2724 rettv->vval.v_number = -1; 2725 2726 if (tv_check_for_string_arg(argvars, 0) == FAIL 2727 || tv_check_for_opt_bool_arg(argvars, 1) == FAIL) { 2728 return; 2729 } 2730 2731 varnumber_T countcc = false; 2732 if (argvars[1].v_type != VAR_UNKNOWN) { 2733 countcc = tv_get_bool(&argvars[1]); 2734 } 2735 2736 const char *s = tv_get_string(&argvars[0]); 2737 varnumber_T len = 0; 2738 int (*func_mb_ptr2char_adv)(const char **pp); 2739 2740 func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv; 2741 while (*s != NUL) { 2742 const int ch = func_mb_ptr2char_adv(&s); 2743 if (ch > 0xFFFF) { 2744 len++; 2745 } 2746 len++; 2747 } 2748 rettv->vval.v_number = len; 2749 } 2750 2751 /// "strdisplaywidth()" function 2752 void f_strdisplaywidth(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2753 { 2754 const char *const s = tv_get_string(&argvars[0]); 2755 int col = 0; 2756 2757 if (argvars[1].v_type != VAR_UNKNOWN) { 2758 col = (int)tv_get_number(&argvars[1]); 2759 } 2760 2761 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, (char *)s) - col); 2762 } 2763 2764 /// "strwidth()" function 2765 void f_strwidth(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2766 { 2767 const char *const s = tv_get_string(&argvars[0]); 2768 2769 rettv->vval.v_number = (varnumber_T)mb_string2cells(s); 2770 } 2771 2772 /// "strcharpart()" function 2773 void f_strcharpart(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2774 { 2775 const char *const p = tv_get_string(&argvars[0]); 2776 const size_t slen = strlen(p); 2777 2778 int nbyte = 0; 2779 varnumber_T skipcc = false; 2780 bool error = false; 2781 varnumber_T nchar = tv_get_number_chk(&argvars[1], &error); 2782 if (!error) { 2783 if (argvars[2].v_type != VAR_UNKNOWN 2784 && argvars[3].v_type != VAR_UNKNOWN) { 2785 skipcc = tv_get_bool_chk(&argvars[3], &error); 2786 if (error) { 2787 return; 2788 } 2789 if (skipcc < 0 || skipcc > 1) { 2790 semsg(_(e_using_number_as_bool_nr), skipcc); 2791 return; 2792 } 2793 } 2794 2795 if (nchar > 0) { 2796 while (nchar > 0 && (size_t)nbyte < slen) { 2797 if (skipcc) { 2798 nbyte += utfc_ptr2len(p + nbyte); 2799 } else { 2800 nbyte += utf_ptr2len(p + nbyte); 2801 } 2802 nchar--; 2803 } 2804 } else { 2805 nbyte = (int)nchar; 2806 } 2807 } 2808 int len = 0; 2809 if (argvars[2].v_type != VAR_UNKNOWN) { 2810 int charlen = (int)tv_get_number(&argvars[2]); 2811 while (charlen > 0 && nbyte + len < (int)slen) { 2812 int off = nbyte + len; 2813 2814 if (off < 0) { 2815 len += 1; 2816 } else { 2817 if (skipcc) { 2818 len += utfc_ptr2len(p + off); 2819 } else { 2820 len += utf_ptr2len(p + off); 2821 } 2822 } 2823 charlen--; 2824 } 2825 } else { 2826 len = (int)slen - nbyte; // default: all bytes that are available. 2827 } 2828 2829 // Only return the overlap between the specified part and the actual 2830 // string. 2831 if (nbyte < 0) { 2832 len += nbyte; 2833 nbyte = 0; 2834 } else if ((size_t)nbyte > slen) { 2835 nbyte = (int)slen; 2836 } 2837 if (len < 0) { 2838 len = 0; 2839 } else if (nbyte + len > (int)slen) { 2840 len = (int)slen - nbyte; 2841 } 2842 2843 rettv->v_type = VAR_STRING; 2844 rettv->vval.v_string = xmemdupz(p + nbyte, (size_t)len); 2845 } 2846 2847 /// "strpart()" function 2848 void f_strpart(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2849 { 2850 bool error = false; 2851 2852 const char *const p = tv_get_string(&argvars[0]); 2853 const size_t slen = strlen(p); 2854 2855 varnumber_T n = tv_get_number_chk(&argvars[1], &error); 2856 varnumber_T len; 2857 if (error) { 2858 len = 0; 2859 } else if (argvars[2].v_type != VAR_UNKNOWN) { 2860 len = tv_get_number(&argvars[2]); 2861 } else { 2862 len = (varnumber_T)slen - n; // Default len: all bytes that are available. 2863 } 2864 2865 // Only return the overlap between the specified part and the actual 2866 // string. 2867 if (n < 0) { 2868 len += n; 2869 n = 0; 2870 } else if (n > (varnumber_T)slen) { 2871 n = (varnumber_T)slen; 2872 } 2873 if (len < 0) { 2874 len = 0; 2875 } else if (n + len > (varnumber_T)slen) { 2876 len = (varnumber_T)slen - n; 2877 } 2878 2879 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN) { 2880 int64_t off; 2881 2882 // length in characters 2883 for (off = n; off < (int64_t)slen && len > 0; len--) { 2884 off += utfc_ptr2len(p + off); 2885 } 2886 len = off - n; 2887 } 2888 2889 rettv->v_type = VAR_STRING; 2890 rettv->vval.v_string = xmemdupz(p + n, (size_t)len); 2891 } 2892 2893 /// "strridx()" function 2894 void f_strridx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2895 { 2896 char buf[NUMBUFLEN]; 2897 const char *const needle = tv_get_string_chk(&argvars[1]); 2898 const char *const haystack = tv_get_string_buf_chk(&argvars[0], buf); 2899 2900 rettv->vval.v_number = -1; 2901 if (needle == NULL || haystack == NULL) { 2902 return; // Type error; errmsg already given. 2903 } 2904 2905 const size_t haystack_len = strlen(haystack); 2906 ptrdiff_t end_idx; 2907 if (argvars[2].v_type != VAR_UNKNOWN) { 2908 // Third argument: upper limit for index. 2909 end_idx = (ptrdiff_t)tv_get_number_chk(&argvars[2], NULL); 2910 if (end_idx < 0) { 2911 return; // Can never find a match. 2912 } 2913 } else { 2914 end_idx = (ptrdiff_t)haystack_len; 2915 } 2916 2917 const char *lastmatch = NULL; 2918 if (*needle == NUL) { 2919 // Empty string matches past the end. 2920 lastmatch = haystack + end_idx; 2921 } else { 2922 for (const char *rest = haystack; *rest != NUL; rest++) { 2923 rest = strstr(rest, needle); 2924 if (rest == NULL || rest > haystack + end_idx) { 2925 break; 2926 } 2927 lastmatch = rest; 2928 } 2929 } 2930 2931 if (lastmatch != NULL) { 2932 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack); 2933 } 2934 } 2935 2936 /// "strtrans()" function 2937 void f_strtrans(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2938 { 2939 rettv->v_type = VAR_STRING; 2940 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]), true); 2941 } 2942 2943 /// "utf16idx()" function 2944 /// 2945 /// Converts a byte or character offset in a string to the corresponding UTF-16 2946 /// code unit offset. 2947 void f_utf16idx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 2948 { 2949 rettv->vval.v_number = -1; 2950 2951 if (tv_check_for_string_arg(argvars, 0) == FAIL 2952 || tv_check_for_opt_number_arg(argvars, 1) == FAIL 2953 || tv_check_for_opt_bool_arg(argvars, 2) == FAIL 2954 || (argvars[2].v_type != VAR_UNKNOWN 2955 && tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) { 2956 return; 2957 } 2958 2959 const char *const str = tv_get_string_chk(&argvars[0]); 2960 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL); 2961 if (str == NULL || idx < 0) { 2962 return; 2963 } 2964 2965 varnumber_T countcc = false; 2966 varnumber_T charidx = false; 2967 if (argvars[2].v_type != VAR_UNKNOWN) { 2968 countcc = tv_get_bool(&argvars[2]); 2969 if (argvars[3].v_type != VAR_UNKNOWN) { 2970 charidx = tv_get_bool(&argvars[3]); 2971 } 2972 } 2973 2974 int (*ptr2len)(const char *); 2975 if (countcc) { 2976 ptr2len = utf_ptr2len; 2977 } else { 2978 ptr2len = utfc_ptr2len; 2979 } 2980 2981 const char *p; 2982 int len; 2983 int utf16idx = 0; 2984 for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) { 2985 if (*p == NUL) { 2986 // If the index is exactly the number of bytes or characters in the 2987 // string then return the length of the string in utf-16 code units. 2988 if (charidx ? (idx == 0) : (p == (str + idx))) { 2989 rettv->vval.v_number = len; 2990 } 2991 return; 2992 } 2993 utf16idx = len; 2994 const int clen = ptr2len(p); 2995 const int c = (clen > 1) ? utf_ptr2char(p) : *p; 2996 if (c > 0xFFFF) { 2997 len++; 2998 } 2999 p += ptr2len(p); 3000 if (charidx) { 3001 idx--; 3002 } 3003 } 3004 3005 rettv->vval.v_number = utf16idx; 3006 } 3007 3008 /// "tolower(string)" function 3009 void f_tolower(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 3010 { 3011 rettv->v_type = VAR_STRING; 3012 rettv->vval.v_string = strcase_save(tv_get_string(&argvars[0]), false); 3013 } 3014 3015 /// "toupper(string)" function 3016 void f_toupper(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 3017 { 3018 rettv->v_type = VAR_STRING; 3019 rettv->vval.v_string = strcase_save(tv_get_string(&argvars[0]), true); 3020 } 3021 3022 /// "tr(string, fromstr, tostr)" function 3023 void f_tr(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 3024 { 3025 char buf[NUMBUFLEN]; 3026 char buf2[NUMBUFLEN]; 3027 3028 const char *in_str = tv_get_string(&argvars[0]); 3029 const char *fromstr = tv_get_string_buf_chk(&argvars[1], buf); 3030 const char *tostr = tv_get_string_buf_chk(&argvars[2], buf2); 3031 3032 // Default return value: empty string. 3033 rettv->v_type = VAR_STRING; 3034 rettv->vval.v_string = NULL; 3035 if (fromstr == NULL || tostr == NULL) { 3036 return; // Type error; errmsg already given. 3037 } 3038 garray_T ga; 3039 ga_init(&ga, (int)sizeof(char), 80); 3040 3041 // fromstr and tostr have to contain the same number of chars. 3042 bool first = true; 3043 while (*in_str != NUL) { 3044 const char *cpstr = in_str; 3045 const int inlen = utfc_ptr2len(in_str); 3046 int cplen = inlen; 3047 int idx = 0; 3048 int fromlen; 3049 for (const char *p = fromstr; *p != NUL; p += fromlen) { 3050 fromlen = utfc_ptr2len(p); 3051 if (fromlen == inlen && strncmp(in_str, p, (size_t)inlen) == 0) { 3052 int tolen; 3053 for (p = tostr; *p != NUL; p += tolen) { 3054 tolen = utfc_ptr2len(p); 3055 if (idx-- == 0) { 3056 cplen = tolen; 3057 cpstr = p; 3058 break; 3059 } 3060 } 3061 if (*p == NUL) { // tostr is shorter than fromstr. 3062 goto error; 3063 } 3064 break; 3065 } 3066 idx++; 3067 } 3068 3069 if (first && cpstr == in_str) { 3070 // Check that fromstr and tostr have the same number of 3071 // (multi-byte) characters. Done only once when a character 3072 // of in_str doesn't appear in fromstr. 3073 first = false; 3074 int tolen; 3075 for (const char *p = tostr; *p != NUL; p += tolen) { 3076 tolen = utfc_ptr2len(p); 3077 idx--; 3078 } 3079 if (idx != 0) { 3080 goto error; 3081 } 3082 } 3083 3084 ga_grow(&ga, cplen); 3085 memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen); 3086 ga.ga_len += cplen; 3087 3088 in_str += inlen; 3089 } 3090 3091 // add a terminating NUL 3092 ga_append(&ga, NUL); 3093 3094 rettv->vval.v_string = ga.ga_data; 3095 return; 3096 error: 3097 semsg(_(e_invarg2), fromstr); 3098 ga_clear(&ga); 3099 } 3100 3101 /// "trim({expr})" function 3102 void f_trim(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) 3103 { 3104 char buf1[NUMBUFLEN]; 3105 char buf2[NUMBUFLEN]; 3106 const char *head = tv_get_string_buf_chk(&argvars[0], buf1); 3107 const char *mask = NULL; 3108 const char *prev; 3109 const char *p; 3110 int dir = 0; 3111 3112 rettv->v_type = VAR_STRING; 3113 rettv->vval.v_string = NULL; 3114 if (head == NULL) { 3115 return; 3116 } 3117 3118 if (tv_check_for_opt_string_arg(argvars, 1) == FAIL) { 3119 return; 3120 } 3121 3122 if (argvars[1].v_type == VAR_STRING) { 3123 mask = tv_get_string_buf_chk(&argvars[1], buf2); 3124 if (*mask == NUL) { 3125 mask = NULL; 3126 } 3127 3128 if (argvars[2].v_type != VAR_UNKNOWN) { 3129 bool error = false; 3130 // leading or trailing characters to trim 3131 dir = (int)tv_get_number_chk(&argvars[2], &error); 3132 if (error) { 3133 return; 3134 } 3135 if (dir < 0 || dir > 2) { 3136 semsg(_(e_invarg2), tv_get_string(&argvars[2])); 3137 return; 3138 } 3139 } 3140 } 3141 3142 if (dir == 0 || dir == 1) { 3143 // Trim leading characters 3144 while (*head != NUL) { 3145 int c1 = utf_ptr2char(head); 3146 if (mask == NULL) { 3147 if (c1 > ' ' && c1 != 0xa0) { 3148 break; 3149 } 3150 } else { 3151 for (p = mask; *p != NUL; MB_PTR_ADV(p)) { 3152 if (c1 == utf_ptr2char(p)) { 3153 break; 3154 } 3155 } 3156 if (*p == NUL) { 3157 break; 3158 } 3159 } 3160 MB_PTR_ADV(head); 3161 } 3162 } 3163 3164 const char *tail = head + strlen(head); 3165 if (dir == 0 || dir == 2) { 3166 // Trim trailing characters 3167 for (; tail > head; tail = prev) { 3168 prev = tail; 3169 MB_PTR_BACK(head, prev); 3170 int c1 = utf_ptr2char(prev); 3171 if (mask == NULL) { 3172 if (c1 > ' ' && c1 != 0xa0) { 3173 break; 3174 } 3175 } else { 3176 for (p = mask; *p != NUL; MB_PTR_ADV(p)) { 3177 if (c1 == utf_ptr2char(p)) { 3178 break; 3179 } 3180 } 3181 if (*p == NUL) { 3182 break; 3183 } 3184 } 3185 } 3186 } 3187 rettv->vval.v_string = xstrnsave(head, (size_t)(tail - head)); 3188 } 3189 3190 /// compare two keyvalue_T structs by case sensitive value 3191 int cmp_keyvalue_value(const void *a, const void *b) 3192 { 3193 keyvalue_T *kv1 = (keyvalue_T *)a; 3194 keyvalue_T *kv2 = (keyvalue_T *)b; 3195 3196 return strcmp(kv1->value, kv2->value); 3197 } 3198 3199 /// compare two keyvalue_T structs by value with length 3200 int cmp_keyvalue_value_n(const void *a, const void *b) 3201 { 3202 keyvalue_T *kv1 = (keyvalue_T *)a; 3203 keyvalue_T *kv2 = (keyvalue_T *)b; 3204 3205 return strncmp(kv1->value, kv2->value, MAX(kv1->length, kv2->length)); 3206 } 3207 3208 /// compare two keyvalue_T structs by case insensitive value 3209 int cmp_keyvalue_value_i(const void *a, const void *b) 3210 { 3211 keyvalue_T *kv1 = (keyvalue_T *)a; 3212 keyvalue_T *kv2 = (keyvalue_T *)b; 3213 3214 return STRICMP(kv1->value, kv2->value); 3215 } 3216 3217 /// compare two keyvalue_T structs by case insensitive value with length 3218 int cmp_keyvalue_value_ni(const void *a, const void *b) 3219 { 3220 keyvalue_T *kv1 = (keyvalue_T *)a; 3221 keyvalue_T *kv2 = (keyvalue_T *)b; 3222 3223 return STRNICMP(kv1->value, kv2->value, MAX(kv1->length, kv2->length)); 3224 }