neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

strings.c (91265B)


      1 #include <assert.h>
      2 #include <inttypes.h>
      3 #include <math.h>
      4 #include <stdarg.h>
      5 #include <stdbool.h>
      6 #include <stddef.h>
      7 #include <stdio.h>
      8 #include <stdlib.h>
      9 #include <string.h>
     10 
     11 #include "auto/config.h"
     12 #include "nvim/api/private/defs.h"
     13 #include "nvim/api/private/helpers.h"
     14 #include "nvim/ascii_defs.h"
     15 #include "nvim/assert_defs.h"
     16 #include "nvim/charset.h"
     17 #include "nvim/errors.h"
     18 #include "nvim/eval/encode.h"
     19 #include "nvim/eval/typval.h"
     20 #include "nvim/eval/typval_defs.h"
     21 #include "nvim/ex_docmd.h"
     22 #include "nvim/garray.h"
     23 #include "nvim/garray_defs.h"
     24 #include "nvim/gettext_defs.h"
     25 #include "nvim/macros_defs.h"
     26 #include "nvim/math.h"
     27 #include "nvim/mbyte.h"
     28 #include "nvim/mbyte_defs.h"
     29 #include "nvim/memory.h"
     30 #include "nvim/memory_defs.h"
     31 #include "nvim/message.h"
     32 #include "nvim/option.h"
     33 #include "nvim/plines.h"
     34 #include "nvim/strings.h"
     35 #include "nvim/types_defs.h"
     36 #include "nvim/vim_defs.h"
     37 
     38 #include "strings.c.generated.h"
     39 
     40 static const char e_cannot_mix_positional_and_non_positional_str[]
     41  = N_("E1500: Cannot mix positional and non-positional arguments: %s");
     42 static const char e_fmt_arg_nr_unused_str[]
     43  = N_("E1501: format argument %d unused in $-style format: %s");
     44 static const char e_positional_num_field_spec_reused_str_str[]
     45  = N_("E1502: Positional argument %d used as field width reused as different type: %s/%s");
     46 static const char e_positional_nr_out_of_bounds_str[]
     47  = N_("E1503: Positional argument %d out of bounds: %s");
     48 static const char e_positional_arg_num_type_inconsistent_str_str[]
     49  = N_("E1504: Positional argument %d type used inconsistently: %s/%s");
     50 static const char e_invalid_format_specifier_str[]
     51  = N_("E1505: Invalid format specifier: %s");
     52 static const char e_aptypes_is_null_nr_str[]
     53  = "E1507: Internal error: ap_types or ap_types[idx] is NULL: %d: %s";
     54 
     55 static const char typename_unknown[] = N_("unknown");
     56 static const char typename_int[] = N_("int");
     57 static const char typename_longint[] = N_("long int");
     58 static const char typename_longlongint[] = N_("long long int");
     59 static const char typename_signedsizet[] = N_("signed size_t");
     60 static const char typename_unsignedint[] = N_("unsigned int");
     61 static const char typename_unsignedlongint[] = N_("unsigned long int");
     62 static const char typename_unsignedlonglongint[] = N_("unsigned long long int");
     63 static const char typename_sizet[] = N_("size_t");
     64 static const char typename_pointer[] = N_("pointer");
     65 static const char typename_percent[] = N_("percent");
     66 static const char typename_char[] = N_("char");
     67 static const char typename_string[] = N_("string");
     68 static const char typename_float[] = N_("float");
     69 
     70 /// Copy up to `len` bytes of `string` into newly allocated memory and
     71 /// terminate with a NUL. The allocated memory always has size `len + 1`, even
     72 /// when `string` is shorter.
     73 char *xstrnsave(const char *string, size_t len)
     74  FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
     75 {
     76  return strncpy(xmallocz(len), string, len);  // NOLINT(runtime/printf)
     77 }
     78 
     79 // Same as vim_strsave(), but any characters found in esc_chars are preceded
     80 // by a backslash.
     81 char *vim_strsave_escaped(const char *string, const char *esc_chars)
     82  FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
     83 {
     84  return vim_strsave_escaped_ext(string, esc_chars, '\\', false);
     85 }
     86 
     87 // Same as vim_strsave_escaped(), but when "bsl" is true also escape
     88 // characters where rem_backslash() would remove the backslash.
     89 // Escape the characters with "cc".
     90 char *vim_strsave_escaped_ext(const char *string, const char *esc_chars, char cc, bool bsl)
     91  FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
     92 {
     93  // First count the number of backslashes required.
     94  // Then allocate the memory and insert them.
     95  size_t length = 1;                    // count the trailing NUL
     96  for (const char *p = string; *p; p++) {
     97    const size_t l = (size_t)(utfc_ptr2len(p));
     98    if (l > 1) {
     99      length += l;                      // count a multibyte char
    100      p += l - 1;
    101      continue;
    102    }
    103    if (vim_strchr(esc_chars, (uint8_t)(*p)) != NULL || (bsl && rem_backslash(p))) {
    104      length++;                         // count a backslash
    105    }
    106    length++;                           // count an ordinary char
    107  }
    108 
    109  char *escaped_string = xmalloc(length);
    110  char *p2 = escaped_string;
    111  for (const char *p = string; *p; p++) {
    112    const size_t l = (size_t)(utfc_ptr2len(p));
    113    if (l > 1) {
    114      memcpy(p2, p, l);
    115      p2 += l;
    116      p += l - 1;                     // skip multibyte char
    117      continue;
    118    }
    119    if (vim_strchr(esc_chars, (uint8_t)(*p)) != NULL || (bsl && rem_backslash(p))) {
    120      *p2++ = cc;
    121    }
    122    *p2++ = *p;
    123  }
    124  *p2 = NUL;
    125 
    126  return escaped_string;
    127 }
    128 
    129 /// Save a copy of an unquoted string
    130 ///
    131 /// Turns string like `a\bc"def\"ghi\\\n"jkl` into `a\bcdef"ghi\\njkl`, for use
    132 /// in shell_build_argv: the only purpose of backslash is making next character
    133 /// be treated literally inside the double quotes, if this character is
    134 /// backslash or quote.
    135 ///
    136 /// @param[in]  string  String to copy.
    137 /// @param[in]  length  Length of the string to copy.
    138 ///
    139 /// @return [allocated] Copy of the string.
    140 char *vim_strnsave_unquoted(const char *const string, const size_t length)
    141  FUNC_ATTR_MALLOC FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
    142  FUNC_ATTR_NONNULL_RET
    143 {
    144 #define ESCAPE_COND(p, inquote, string_end) \
    145  (*(p) == '\\' && (inquote) && (p) + 1 < (string_end) && ((p)[1] == '\\' || (p)[1] == '"'))
    146  size_t ret_length = 0;
    147  bool inquote = false;
    148  const char *const string_end = string + length;
    149  for (const char *p = string; p < string_end; p++) {
    150    if (*p == '"') {
    151      inquote = !inquote;
    152    } else if (ESCAPE_COND(p, inquote, string_end)) {
    153      ret_length++;
    154      p++;
    155    } else {
    156      ret_length++;
    157    }
    158  }
    159 
    160  char *const ret = xmallocz(ret_length);
    161  char *rp = ret;
    162  inquote = false;
    163  for (const char *p = string; p < string_end; p++) {
    164    if (*p == '"') {
    165      inquote = !inquote;
    166    } else if (ESCAPE_COND(p, inquote, string_end)) {
    167      *rp++ = *(++p);
    168    } else {
    169      *rp++ = *p;
    170    }
    171  }
    172 #undef ESCAPE_COND
    173 
    174  return ret;
    175 }
    176 
    177 /// Escape "string" for use as a shell argument with system().
    178 /// This uses single quotes, except when we know we need to use double quotes
    179 /// (MS-Windows without 'shellslash' set).
    180 /// Escape a newline, depending on the 'shell' option.
    181 /// When "do_special" is true also replace "!", "%", "#" and things starting
    182 /// with "<" like "<cfile>".
    183 /// When "do_newline" is false do not escape newline unless it is csh shell.
    184 ///
    185 /// @return  the result in allocated memory.
    186 char *vim_strsave_shellescape(const char *string, bool do_special, bool do_newline)
    187  FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
    188 {
    189  size_t l;
    190 
    191  // Only csh and similar shells expand '!' within single quotes.  For sh and
    192  // the like we must not put a backslash before it, it will be taken
    193  // literally.  If do_special is set the '!' will be escaped twice.
    194  // Csh also needs to have "\n" escaped twice when do_special is set.
    195  int csh_like = csh_like_shell();
    196 
    197  // Fish shell uses '\' as an escape character within single quotes, so '\'
    198  // itself must be escaped to get a literal '\'.
    199  bool fish_like = fish_like_shell();
    200 
    201  // First count the number of extra bytes required.
    202  size_t length = strlen(string) + 3;       // two quotes and a trailing NUL
    203  for (const char *p = string; *p != NUL; MB_PTR_ADV(p)) {
    204 #ifdef MSWIN
    205    if (!p_ssl) {
    206      if (*p == '"') {
    207        length++;                       // " -> ""
    208      }
    209    } else
    210 #endif
    211    if (*p == '\'') {
    212      length += 3;                      // ' => '\''
    213    }
    214    if ((*p == '\n' && (csh_like || do_newline))
    215        || (*p == '!' && (csh_like || do_special))) {
    216      length++;                         // insert backslash
    217      if (csh_like && do_special) {
    218        length++;                       // insert backslash
    219      }
    220    }
    221    if (do_special && find_cmdline_var(p, &l) >= 0) {
    222      length++;                         // insert backslash
    223      p += l - 1;
    224    }
    225    if (*p == '\\' && fish_like) {
    226      length++;  // insert backslash
    227    }
    228  }
    229 
    230  // Allocate memory for the result and fill it.
    231  char *escaped_string = xmalloc(length);
    232  char *d = escaped_string;
    233 
    234  // add opening quote
    235 #ifdef MSWIN
    236  if (!p_ssl) {
    237    *d++ = '"';
    238  } else
    239 #endif
    240  *d++ = '\'';
    241 
    242  for (const char *p = string; *p != NUL;) {
    243 #ifdef MSWIN
    244    if (!p_ssl) {
    245      if (*p == '"') {
    246        *d++ = '"';
    247        *d++ = '"';
    248        p++;
    249        continue;
    250      }
    251    } else
    252 #endif
    253    if (*p == '\'') {
    254      *d++ = '\'';
    255      *d++ = '\\';
    256      *d++ = '\'';
    257      *d++ = '\'';
    258      p++;
    259      continue;
    260    }
    261    if ((*p == '\n' && (csh_like || do_newline))
    262        || (*p == '!' && (csh_like || do_special))) {
    263      *d++ = '\\';
    264      if (csh_like && do_special) {
    265        *d++ = '\\';
    266      }
    267      *d++ = *p++;
    268      continue;
    269    }
    270    if (do_special && find_cmdline_var(p, &l) >= 0) {
    271      *d++ = '\\';                    // insert backslash
    272      memcpy(d, p, l);                // copy the var
    273      d += l;
    274      p += l;
    275      continue;
    276    }
    277    if (*p == '\\' && fish_like) {
    278      *d++ = '\\';
    279      *d++ = *p++;
    280      continue;
    281    }
    282 
    283    mb_copy_char(&p, &d);
    284  }
    285 
    286  // add terminating quote and finish with a NUL
    287 #ifdef MSWIN
    288  if (!p_ssl) {
    289    *d++ = '"';
    290  } else
    291 #endif
    292  *d++ = '\'';
    293  *d = NUL;
    294 
    295  return escaped_string;
    296 }
    297 
    298 // Like vim_strsave(), but make all characters uppercase.
    299 // This uses ASCII lower-to-upper case translation, language independent.
    300 char *vim_strsave_up(const char *string)
    301  FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
    302 {
    303  char *p1 = xmalloc(strlen(string) + 1);
    304  vim_strcpy_up(p1, string);
    305  return p1;
    306 }
    307 
    308 /// Like xstrnsave(), but make all characters uppercase.
    309 /// This uses ASCII lower-to-upper case translation, language independent.
    310 char *vim_strnsave_up(const char *string, size_t len)
    311  FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
    312 {
    313  char *p1 = xmalloc(len + 1);
    314  vim_strncpy_up(p1, string, len);
    315  return p1;
    316 }
    317 
    318 // ASCII lower-to-upper case translation, language independent.
    319 void vim_strup(char *p)
    320  FUNC_ATTR_NONNULL_ALL
    321 {
    322  uint8_t c;
    323  while ((c = (uint8_t)(*p)) != NUL) {
    324    *p++ = (char)(uint8_t)(c < 'a' || c > 'z' ? c : c - 0x20);
    325  }
    326 }
    327 
    328 // strcpy plus vim_strup.
    329 void vim_strcpy_up(char *restrict dst, const char *restrict src)
    330  FUNC_ATTR_NONNULL_ALL
    331 {
    332  uint8_t c;
    333  while ((c = (uint8_t)(*src++)) != NUL) {
    334    *dst++ = (char)(uint8_t)(c < 'a' || c > 'z' ? c : c - 0x20);
    335  }
    336  *dst = NUL;
    337 }
    338 
    339 // strncpy (NUL-terminated) plus vim_strup.
    340 void vim_strncpy_up(char *restrict dst, const char *restrict src, size_t n)
    341  FUNC_ATTR_NONNULL_ALL
    342 {
    343  uint8_t c;
    344  while (n-- && (c = (uint8_t)(*src++)) != NUL) {
    345    *dst++ = (char)(uint8_t)(c < 'a' || c > 'z' ? c : c - 0x20);
    346  }
    347  *dst = NUL;
    348 }
    349 
    350 // memcpy (does not NUL-terminate) plus vim_strup.
    351 void vim_memcpy_up(char *restrict dst, const char *restrict src, size_t n)
    352  FUNC_ATTR_NONNULL_ALL
    353 {
    354  uint8_t c;
    355  while (n--) {
    356    c = (uint8_t)(*src++);
    357    *dst++ = (char)(uint8_t)(c < 'a' || c > 'z' ? c : c - 0x20);
    358  }
    359 }
    360 
    361 /// Make given string all upper-case or all lower-case
    362 ///
    363 /// Handles multi-byte characters as good as possible.
    364 ///
    365 /// @param[in]  orig  Input string.
    366 /// @param[in]  upper If true make uppercase, otherwise lowercase
    367 ///
    368 /// @return [allocated] upper-cased string.
    369 char *strcase_save(const char *const orig, bool upper)
    370  FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
    371 {
    372  // Calculate the initial length and allocate memory for the result
    373  size_t orig_len = strlen(orig);
    374  // +1 for the null terminator
    375  char *res = xmalloc(orig_len + 1);
    376  // Index in the result string
    377  size_t res_index = 0;
    378  // Current position in the original string
    379  const char *p = orig;
    380 
    381  while (*p != NUL) {
    382    CharInfo char_info = utf_ptr2CharInfo(p);
    383    int c = char_info.value < 0 ? (uint8_t)(*p) : char_info.value;
    384    int newc = upper ? mb_toupper(c) : mb_tolower(c);
    385    // Cast to size_t to avoid mixing types in arithmetic
    386    size_t newl = (size_t)utf_char2len(newc);
    387 
    388    // Check if there's enough space in the allocated memory
    389    if (res_index + newl > orig_len) {
    390      // Need more space: allocate extra space for the new character and the null terminator
    391      size_t new_size = res_index + newl + 1;
    392      res = xrealloc(res, new_size);
    393      // Adjust the original length to the new size, minus the null terminator
    394      orig_len = new_size - 1;
    395    }
    396 
    397    // Write the possibly new character into the result string
    398    utf_char2bytes(newc, res + res_index);
    399    // Move the index in the result string
    400    res_index += newl;
    401    // Move to the next character in the original string
    402    p += char_info.len;
    403  }
    404 
    405  // Null-terminate the result string
    406  res[res_index] = NUL;
    407  return res;
    408 }
    409 
    410 // delete spaces at the end of a string
    411 void del_trailing_spaces(char *ptr)
    412  FUNC_ATTR_NONNULL_ALL
    413 {
    414  char *q = ptr + strlen(ptr);
    415  while (--q > ptr && ascii_iswhite(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V) {
    416    *q = NUL;
    417  }
    418 }
    419 
    420 #if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP))
    421 // Compare two strings, ignoring case, using current locale.
    422 // Doesn't work for multi-byte characters.
    423 // return 0 for match, < 0 for smaller, > 0 for bigger
    424 int vim_stricmp(const char *s1, const char *s2)
    425  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
    426 {
    427  int i;
    428 
    429  while (true) {
    430    i = (int)TOLOWER_LOC((uint8_t)(*s1)) - (int)TOLOWER_LOC((uint8_t)(*s2));
    431    if (i != 0) {
    432      return i;                             // this character different
    433    }
    434    if (*s1 == NUL) {
    435      break;                                // strings match until NUL
    436    }
    437    s1++;
    438    s2++;
    439  }
    440  return 0;                                 // strings match
    441 }
    442 #endif
    443 
    444 #if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP))
    445 // Compare two strings, for length "len", ignoring case, using current locale.
    446 // Doesn't work for multi-byte characters.
    447 // return 0 for match, < 0 for smaller, > 0 for bigger
    448 int vim_strnicmp(const char *s1, const char *s2, size_t len)
    449  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
    450 {
    451  int i;
    452 
    453  while (len > 0) {
    454    i = (int)TOLOWER_LOC((uint8_t)(*s1)) - (int)TOLOWER_LOC((uint8_t)(*s2));
    455    if (i != 0) {
    456      return i;                             // this character different
    457    }
    458    if (*s1 == NUL) {
    459      break;                                // strings match until NUL
    460    }
    461    s1++;
    462    s2++;
    463    len--;
    464  }
    465  return 0;                                 // strings match
    466 }
    467 #endif
    468 
    469 /// Case-insensitive `strequal`.
    470 bool striequal(const char *a, const char *b)
    471  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
    472 {
    473  return (a == NULL && b == NULL) || (a && b && STRICMP(a, b) == 0);
    474 }
    475 
    476 /// Compare two ASCII strings, for length "len", ignoring case, ignoring locale.
    477 ///
    478 /// @return 0 for match, < 0 for smaller, > 0 for bigger
    479 int vim_strnicmp_asc(const char *s1, const char *s2, size_t len)
    480  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
    481 {
    482  int i = 0;
    483  while (len > 0) {
    484    i = TOLOWER_ASC(*s1) - TOLOWER_ASC(*s2);
    485    if (i != 0) {
    486      break;                       // this character is different
    487    }
    488    if (*s1 == NUL) {
    489      break;                       // strings match until NUL
    490    }
    491    s1++;
    492    s2++;
    493    len--;
    494  }
    495  return i;
    496 }
    497 
    498 /// strchr() version which handles multibyte strings
    499 ///
    500 /// @param[in]  string  String to search in.
    501 /// @param[in]  c  Character to search for.
    502 ///
    503 /// @return Pointer to the first byte of the found character in string or NULL
    504 ///         if it was not found or character is invalid. NUL character is never
    505 ///         found, use `strlen()` instead.
    506 char *vim_strchr(const char *const string, const int c)
    507  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
    508 {
    509  if (c <= 0) {
    510    return NULL;
    511  } else if (c < 0x80) {
    512    return strchr(string, c);
    513  } else {
    514    char u8char[MB_MAXBYTES + 1];
    515    const int len = utf_char2bytes(c, u8char);
    516    u8char[len] = NUL;
    517    return strstr(string, u8char);
    518  }
    519 }
    520 
    521 // Sort an array of strings.
    522 
    523 static int sort_compare(const void *s1, const void *s2)
    524  FUNC_ATTR_NONNULL_ALL
    525 {
    526  return strcmp(*(char **)s1, *(char **)s2);
    527 }
    528 
    529 void sort_strings(char **files, int count)
    530 {
    531  qsort((void *)files, (size_t)count, sizeof(char *), sort_compare);
    532 }
    533 
    534 // Return true if string "s" contains a non-ASCII character (128 or higher).
    535 // When "s" is NULL false is returned.
    536 bool has_non_ascii(const char *s)
    537  FUNC_ATTR_PURE
    538 {
    539  if (s != NULL) {
    540    for (const char *p = s; *p != NUL; p++) {
    541      if ((uint8_t)(*p) >= 128) {
    542        return true;
    543      }
    544    }
    545  }
    546  return false;
    547 }
    548 
    549 /// Return true if string "s" contains a non-ASCII character (128 or higher).
    550 /// When "s" is NULL false is returned.
    551 bool has_non_ascii_len(const char *const s, const size_t len)
    552  FUNC_ATTR_PURE
    553 {
    554  if (s != NULL) {
    555    for (size_t i = 0; i < len; i++) {
    556      if ((uint8_t)s[i] >= 128) {
    557        return true;
    558      }
    559    }
    560  }
    561  return false;
    562 }
    563 
    564 /// Concatenate two strings and return the result in allocated memory.
    565 char *concat_str(const char *restrict str1, const char *restrict str2)
    566  FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
    567 {
    568  size_t l = strlen(str1);
    569  char *dest = xmalloc(l + strlen(str2) + 1);
    570  STRCPY(dest, str1);
    571  STRCPY(dest + l, str2);
    572  return dest;
    573 }
    574 
    575 static const char *const e_printf =
    576  N_("E766: Insufficient arguments for printf()");
    577 
    578 /// Get number argument from idxp entry in tvs
    579 ///
    580 /// Will give an error message for Vimscript entry with invalid type or for insufficient entries.
    581 ///
    582 /// @param[in]  tvs  List of Vimscript values. List is terminated by VAR_UNKNOWN value.
    583 /// @param[in,out]  idxp  Index in a list. Will be incremented. Indexing starts at 1.
    584 ///
    585 /// @return Number value or 0 in case of error.
    586 static varnumber_T tv_nr(typval_T *tvs, int *idxp)
    587  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
    588 {
    589  int idx = *idxp - 1;
    590  varnumber_T n = 0;
    591 
    592  if (tvs[idx].v_type == VAR_UNKNOWN) {
    593    emsg(_(e_printf));
    594  } else {
    595    (*idxp)++;
    596    bool err = false;
    597    n = tv_get_number_chk(&tvs[idx], &err);
    598    if (err) {
    599      n = 0;
    600    }
    601  }
    602  return n;
    603 }
    604 
    605 /// Get string argument from idxp entry in tvs
    606 ///
    607 /// Will give an error message for Vimscript entry with invalid type or for
    608 /// insufficient entries.
    609 ///
    610 /// @param[in]  tvs  List of Vimscript values. List is terminated by VAR_UNKNOWN
    611 ///                  value.
    612 /// @param[in,out]  idxp  Index in a list. Will be incremented.
    613 /// @param[out]  tofree  If the idxp entry in tvs is not a String or a Number,
    614 ///                      it will be converted to String in the same format
    615 ///                      as ":echo" and stored in "*tofree". The caller must
    616 ///                      free "*tofree".
    617 ///
    618 /// @return String value or NULL in case of error.
    619 static const char *tv_str(typval_T *tvs, int *idxp, char **const tofree)
    620  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
    621 {
    622  int idx = *idxp - 1;
    623  const char *s = NULL;
    624 
    625  if (tvs[idx].v_type == VAR_UNKNOWN) {
    626    emsg(_(e_printf));
    627  } else {
    628    (*idxp)++;
    629    if (tvs[idx].v_type == VAR_STRING || tvs[idx].v_type == VAR_NUMBER) {
    630      s = tv_get_string_chk(&tvs[idx]);
    631      *tofree = NULL;
    632    } else {
    633      s = *tofree = encode_tv2echo(&tvs[idx], NULL);
    634    }
    635  }
    636  return s;
    637 }
    638 
    639 /// Get pointer argument from the next entry in tvs
    640 ///
    641 /// Will give an error message for Vimscript entry with invalid type or for
    642 /// insufficient entries.
    643 ///
    644 /// @param[in]  tvs  List of typval_T values.
    645 /// @param[in,out]  idxp  Pointer to the index of the current value.
    646 ///
    647 /// @return Pointer stored in typval_T or NULL.
    648 static const void *tv_ptr(const typval_T *const tvs, int *const idxp)
    649  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
    650 {
    651 #define OFF(attr) offsetof(union typval_vval_union, attr)
    652  STATIC_ASSERT(OFF(v_string) == OFF(v_list)
    653                && OFF(v_string) == OFF(v_dict)
    654                && OFF(v_string) == OFF(v_blob)
    655                && OFF(v_string) == OFF(v_partial)
    656                && sizeof(tvs[0].vval.v_string) == sizeof(tvs[0].vval.v_list)
    657                && sizeof(tvs[0].vval.v_string) == sizeof(tvs[0].vval.v_dict)
    658                && sizeof(tvs[0].vval.v_string) == sizeof(tvs[0].vval.v_blob)
    659                && sizeof(tvs[0].vval.v_string) == sizeof(tvs[0].vval.v_partial),
    660                "Strings, Dictionaries, Lists, Blobs and Partials are expected to be pointers, "
    661                "so that all of them can be accessed via v_string");
    662 #undef OFF
    663  const int idx = *idxp - 1;
    664  if (tvs[idx].v_type == VAR_UNKNOWN) {
    665    emsg(_(e_printf));
    666    return NULL;
    667  }
    668  (*idxp)++;
    669  return tvs[idx].vval.v_string;
    670 }
    671 
    672 /// Get float argument from idxp entry in tvs
    673 ///
    674 /// Will give an error message for Vimscript entry with invalid type or for
    675 /// insufficient entries.
    676 ///
    677 /// @param[in]  tvs  List of Vimscript values. List is terminated by VAR_UNKNOWN value.
    678 /// @param[in,out]  idxp  Index in a list. Will be incremented.
    679 ///
    680 /// @return Floating-point value or zero in case of error.
    681 static float_T tv_float(typval_T *const tvs, int *const idxp)
    682  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
    683 {
    684  int idx = *idxp - 1;
    685  float_T f = 0;
    686 
    687  if (tvs[idx].v_type == VAR_UNKNOWN) {
    688    emsg(_(e_printf));
    689  } else {
    690    (*idxp)++;
    691    if (tvs[idx].v_type == VAR_FLOAT) {
    692      f = tvs[idx].vval.v_float;
    693    } else if (tvs[idx].v_type == VAR_NUMBER) {
    694      f = (float_T)tvs[idx].vval.v_number;
    695    } else {
    696      emsg(_("E807: Expected Float argument for printf()"));
    697    }
    698  }
    699  return f;
    700 }
    701 
    702 // This code was included to provide a portable vsnprintf() and snprintf().
    703 // Some systems may provide their own, but we always use this one for
    704 // consistency.
    705 //
    706 // This code is based on snprintf.c - a portable implementation of snprintf
    707 // by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
    708 // Included with permission.  It was heavily modified to fit in Vim.
    709 // The original code, including useful comments, can be found here:
    710 //
    711 //     http://www.ijs.si/software/snprintf/
    712 //
    713 // This snprintf() only supports the following conversion specifiers:
    714 // s, c, b, B, d, u, o, x, X, p  (and synonyms: i, D, U, O - see below)
    715 // with flags: '-', '+', ' ', '0' and '#'.
    716 // An asterisk is supported for field width as well as precision.
    717 //
    718 // Limited support for floating point was added: 'f', 'e', 'E', 'g', 'G'.
    719 //
    720 // Length modifiers 'h' (short int), 'l' (long int) and "ll" (long long int) are
    721 // supported.
    722 //
    723 // The locale is not used, the string is used as a byte string.  This is only
    724 // relevant for double-byte encodings where the second byte may be '%'.
    725 //
    726 // It is permitted for "str_m" to be zero, and it is permitted to specify NULL
    727 // pointer for resulting string argument if "str_m" is zero (as per ISO C99).
    728 //
    729 // The return value is the number of characters which would be generated
    730 // for the given input, excluding the trailing NUL. If this value
    731 // is greater or equal to "str_m", not all characters from the result
    732 // have been stored in str, output bytes beyond the ("str_m"-1) -th character
    733 // are discarded. If "str_m" is greater than zero it is guaranteed
    734 // the resulting string will be NUL-terminated.
    735 
    736 // vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
    737 // "typval_T".  When the latter is not used it must be NULL.
    738 
    739 /// Append a formatted value to the string
    740 ///
    741 /// @see vim_vsnprintf_typval().
    742 int vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
    743  FUNC_ATTR_PRINTF(3, 4)
    744 {
    745  const size_t len = strlen(str);
    746  size_t space;
    747 
    748  if (str_m <= len) {
    749    space = 0;
    750  } else {
    751    space = str_m - len;
    752  }
    753  va_list ap;
    754  va_start(ap, fmt);
    755  const int str_l = vim_vsnprintf(str + len, space, fmt, ap);
    756  va_end(ap);
    757  return str_l;
    758 }
    759 
    760 /// Write formatted value to the string
    761 ///
    762 /// @param[out]  str  String to write to.
    763 /// @param[in]  str_m  String length.
    764 /// @param[in]  fmt  String format.
    765 ///
    766 /// @return Number of bytes excluding NUL byte that would be written to the
    767 ///         string if str_m was greater or equal to the return value.
    768 int vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
    769  FUNC_ATTR_PRINTF(3, 4)
    770 {
    771  va_list ap;
    772  va_start(ap, fmt);
    773  const int str_l = vim_vsnprintf(str, str_m, fmt, ap);
    774  va_end(ap);
    775  return str_l;
    776 }
    777 
    778 // Return the representation of infinity for printf() function:
    779 // "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
    780 static const char *infinity_str(bool positive, char fmt_spec, int force_sign,
    781                                int space_for_positive)
    782 {
    783  static const char *table[] = {
    784    "-inf", "inf", "+inf", " inf",
    785    "-INF", "INF", "+INF", " INF"
    786  };
    787  int idx = positive * (1 + force_sign + force_sign * space_for_positive);
    788  if (ASCII_ISUPPER(fmt_spec)) {
    789    idx += 4;
    790  }
    791  return table[idx];
    792 }
    793 
    794 /// Like vim_snprintf() except the return value can be safely used to increment a
    795 /// buffer length.
    796 /// Normal `snprintf()` (and `vim_snprintf()`) returns the number of bytes that
    797 /// would have been copied if the destination buffer was large enough.
    798 /// This means that you cannot rely on it's return value for the destination
    799 /// length because the destination may be shorter than the source. This function
    800 /// guarantees the returned length will never be greater than the destination length.
    801 size_t vim_snprintf_safelen(char *str, size_t str_m, const char *fmt, ...)
    802 {
    803  va_list ap;
    804  int str_l;
    805 
    806  va_start(ap, fmt);
    807  str_l = vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
    808  va_end(ap);
    809 
    810  if (str_l < 0) {
    811    *str = NUL;
    812    return 0;
    813  }
    814  return ((size_t)str_l >= str_m) ? str_m - 1 : (size_t)str_l;
    815 }
    816 
    817 int vim_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap)
    818 {
    819  return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
    820 }
    821 
    822 enum {
    823  TYPE_UNKNOWN = -1,
    824  TYPE_INT,
    825  TYPE_LONGINT,
    826  TYPE_LONGLONGINT,
    827  TYPE_SIGNEDSIZET,
    828  TYPE_UNSIGNEDINT,
    829  TYPE_UNSIGNEDLONGINT,
    830  TYPE_UNSIGNEDLONGLONGINT,
    831  TYPE_SIZET,
    832  TYPE_POINTER,
    833  TYPE_PERCENT,
    834  TYPE_CHAR,
    835  TYPE_STRING,
    836  TYPE_FLOAT,
    837 };
    838 
    839 /// Types that can be used in a format string
    840 static int format_typeof(const char *type)
    841  FUNC_ATTR_NONNULL_ALL
    842 {
    843  // allowed values: \0, h, l, L
    844  char length_modifier = NUL;
    845 
    846  // current conversion specifier character
    847  char fmt_spec = NUL;
    848 
    849  // parse 'h', 'l', 'll' and 'z' length modifiers
    850  if (*type == 'h' || *type == 'l' || *type == 'z') {
    851    length_modifier = *type;
    852    type++;
    853    if (length_modifier == 'l' && *type == 'l') {
    854      // double l = long long
    855      length_modifier = 'L';
    856      type++;
    857    }
    858  }
    859  fmt_spec = *type;
    860 
    861  // common synonyms:
    862  switch (fmt_spec) {
    863  case 'i':
    864    fmt_spec = 'd'; break;
    865  case '*':
    866    fmt_spec = 'd'; length_modifier = 'h'; break;
    867  case 'D':
    868    fmt_spec = 'd'; length_modifier = 'l'; break;
    869  case 'U':
    870    fmt_spec = 'u'; length_modifier = 'l'; break;
    871  case 'O':
    872    fmt_spec = 'o'; length_modifier = 'l'; break;
    873  default:
    874    break;
    875  }
    876 
    877  // get parameter value, do initial processing
    878  switch (fmt_spec) {
    879  // '%' and 'c' behave similar to 's' regarding flags and field
    880  // widths
    881  case '%':
    882    return TYPE_PERCENT;
    883 
    884  case 'c':
    885    return TYPE_CHAR;
    886 
    887  case 's':
    888  case 'S':
    889    return TYPE_STRING;
    890 
    891  case 'd':
    892  case 'u':
    893  case 'b':
    894  case 'B':
    895  case 'o':
    896  case 'x':
    897  case 'X':
    898  case 'p':
    899    // NOTE: the u, b, o, x, X and p conversion specifiers
    900    // imply the value is unsigned;  d implies a signed
    901    // value
    902 
    903    // 0 if numeric argument is zero (or if pointer is
    904    // NULL for 'p'), +1 if greater than zero (or nonzero
    905    // for unsigned arguments), -1 if negative (unsigned
    906    // argument is never negative)
    907 
    908    if (fmt_spec == 'p') {
    909      return TYPE_POINTER;
    910    } else if (fmt_spec == 'b' || fmt_spec == 'B') {
    911      return TYPE_UNSIGNEDLONGLONGINT;
    912    } else if (fmt_spec == 'd') {
    913      // signed
    914      switch (length_modifier) {
    915      case NUL:
    916      case 'h':
    917        // char and short arguments are passed as int.
    918        return TYPE_INT;
    919      case 'l':
    920        return TYPE_LONGINT;
    921      case 'L':
    922        return TYPE_LONGLONGINT;
    923      case 'z':
    924        return TYPE_SIGNEDSIZET;
    925      }
    926    } else {
    927      // unsigned
    928      switch (length_modifier) {
    929      case NUL:
    930      case 'h':
    931        return TYPE_UNSIGNEDINT;
    932      case 'l':
    933        return TYPE_UNSIGNEDLONGINT;
    934      case 'L':
    935        return TYPE_UNSIGNEDLONGLONGINT;
    936      case 'z':
    937        return TYPE_SIZET;
    938      }
    939    }
    940    break;
    941 
    942  case 'f':
    943  case 'F':
    944  case 'e':
    945  case 'E':
    946  case 'g':
    947  case 'G':
    948    return TYPE_FLOAT;
    949  }
    950 
    951  return TYPE_UNKNOWN;
    952 }
    953 
    954 static char *format_typename(const char *type)
    955  FUNC_ATTR_NONNULL_ALL
    956 {
    957  switch (format_typeof(type)) {
    958  case TYPE_INT:
    959    return _(typename_int);
    960  case TYPE_LONGINT:
    961    return _(typename_longint);
    962  case TYPE_LONGLONGINT:
    963    return _(typename_longlongint);
    964  case TYPE_UNSIGNEDINT:
    965    return _(typename_unsignedint);
    966  case TYPE_SIGNEDSIZET:
    967    return _(typename_signedsizet);
    968  case TYPE_UNSIGNEDLONGINT:
    969    return _(typename_unsignedlongint);
    970  case TYPE_UNSIGNEDLONGLONGINT:
    971    return _(typename_unsignedlonglongint);
    972  case TYPE_SIZET:
    973    return _(typename_sizet);
    974  case TYPE_POINTER:
    975    return _(typename_pointer);
    976  case TYPE_PERCENT:
    977    return _(typename_percent);
    978  case TYPE_CHAR:
    979    return _(typename_char);
    980  case TYPE_STRING:
    981    return _(typename_string);
    982  case TYPE_FLOAT:
    983    return _(typename_float);
    984  }
    985 
    986  return _(typename_unknown);
    987 }
    988 
    989 static int adjust_types(const char ***ap_types, int arg, int *num_posarg, const char *type)
    990  FUNC_ATTR_NONNULL_ALL
    991 {
    992  if (arg <= 0) {
    993    semsg(_(e_invalid_format_specifier_str), type);
    994    return FAIL;
    995  }
    996 
    997  if (*ap_types == NULL || *num_posarg < arg) {
    998    const char **new_types = *ap_types == NULL
    999                             ? xcalloc((size_t)arg, sizeof(const char *))
   1000                             : xrealloc(*ap_types, (size_t)arg * sizeof(const char *));
   1001 
   1002    for (int idx = *num_posarg; idx < arg; idx++) {
   1003      new_types[idx] = NULL;
   1004    }
   1005 
   1006    *ap_types = new_types;
   1007    *num_posarg = arg;
   1008  }
   1009 
   1010  if ((*ap_types)[arg - 1] != NULL) {
   1011    if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*') {
   1012      const char *pt = type;
   1013      if (pt[0] == '*') {
   1014        pt = (*ap_types)[arg - 1];
   1015      }
   1016 
   1017      if (pt[0] != '*') {
   1018        switch (pt[0]) {
   1019        case 'd':
   1020        case 'i':
   1021          break;
   1022        default:
   1023          semsg(_(e_positional_num_field_spec_reused_str_str), arg,
   1024                format_typename((*ap_types)[arg - 1]), format_typename(type));
   1025          return FAIL;
   1026        }
   1027      }
   1028    } else {
   1029      if (format_typeof(type) != format_typeof((*ap_types)[arg - 1])) {
   1030        semsg(_(e_positional_arg_num_type_inconsistent_str_str), arg,
   1031              format_typename(type), format_typename((*ap_types)[arg - 1]));
   1032        return FAIL;
   1033      }
   1034    }
   1035  }
   1036 
   1037  (*ap_types)[arg - 1] = type;
   1038 
   1039  return OK;
   1040 }
   1041 
   1042 static void format_overflow_error(const char *pstart)
   1043 {
   1044  const char *p = pstart;
   1045 
   1046  while (ascii_isdigit((int)(*p))) {
   1047    p++;
   1048  }
   1049 
   1050  size_t arglen = (size_t)(p - pstart);
   1051  char *argcopy = xstrnsave(pstart, arglen);
   1052  semsg(_(e_val_too_large), argcopy);
   1053  xfree(argcopy);
   1054 }
   1055 
   1056 enum { MAX_ALLOWED_STRING_WIDTH = 1048576, };  // 1MiB
   1057 
   1058 static int get_unsigned_int(const char *pstart, const char **p, unsigned *uj, bool overflow_err)
   1059 {
   1060  *uj = (unsigned)(**p - '0');
   1061  (*p)++;
   1062 
   1063  while (ascii_isdigit((int)(**p)) && *uj < MAX_ALLOWED_STRING_WIDTH) {
   1064    *uj = 10 * *uj + (unsigned)(**p - '0');
   1065    (*p)++;
   1066  }
   1067 
   1068  if (*uj > MAX_ALLOWED_STRING_WIDTH) {
   1069    if (overflow_err) {
   1070      format_overflow_error(pstart);
   1071      return FAIL;
   1072    } else {
   1073      *uj = MAX_ALLOWED_STRING_WIDTH;
   1074    }
   1075  }
   1076 
   1077  return OK;
   1078 }
   1079 
   1080 static int parse_fmt_types(const char ***ap_types, int *num_posarg, const char *fmt, typval_T *tvs)
   1081  FUNC_ATTR_NONNULL_ARG(1, 2)
   1082 {
   1083  const char *p = fmt;
   1084  const char *arg = NULL;
   1085 
   1086  int any_pos = 0;
   1087  int any_arg = 0;
   1088 
   1089 #define CHECK_POS_ARG \
   1090  do { \
   1091    if (any_pos && any_arg) { \
   1092      semsg(_(e_cannot_mix_positional_and_non_positional_str), fmt); \
   1093      goto error; \
   1094    } \
   1095  } while (0);
   1096 
   1097  if (p == NULL) {
   1098    return OK;
   1099  }
   1100 
   1101  while (*p != NUL) {
   1102    if (*p != '%') {
   1103      size_t n = (size_t)(xstrchrnul(p + 1, '%') - p);
   1104      p += n;
   1105    } else {
   1106      // allowed values: \0, h, l, L
   1107      char length_modifier = NUL;
   1108 
   1109      // variable for positional arg
   1110      int pos_arg = -1;
   1111      const char *pstart = p + 1;
   1112 
   1113      p++;  // skip '%'
   1114 
   1115      // First check to see if we find a positional
   1116      // argument specifier
   1117      const char *ptype = p;
   1118 
   1119      while (ascii_isdigit(*ptype)) {
   1120        ptype++;
   1121      }
   1122 
   1123      if (*ptype == '$') {
   1124        if (*p == '0') {
   1125          // 0 flag at the wrong place
   1126          semsg(_(e_invalid_format_specifier_str), fmt);
   1127          goto error;
   1128        }
   1129 
   1130        // Positional argument
   1131        unsigned uj;
   1132 
   1133        if (get_unsigned_int(pstart, &p, &uj, tvs != NULL) == FAIL) {
   1134          goto error;
   1135        }
   1136 
   1137        pos_arg = (int)uj;
   1138 
   1139        any_pos = 1;
   1140        CHECK_POS_ARG;
   1141 
   1142        p++;
   1143      }
   1144 
   1145      // parse flags
   1146      while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
   1147             || *p == '#' || *p == '\'') {
   1148        switch (*p) {
   1149        case '0':
   1150          break;
   1151        case '-':
   1152          break;
   1153        case '+':
   1154          break;
   1155        case ' ':  // If both the ' ' and '+' flags appear, the ' '
   1156                   // flag should be ignored
   1157          break;
   1158        case '#':
   1159          break;
   1160        case '\'':
   1161          break;
   1162        }
   1163        p++;
   1164      }
   1165      // If the '0' and '-' flags both appear, the '0' flag should be
   1166      // ignored.
   1167 
   1168      // parse field width
   1169      if (*(arg = p) == '*') {
   1170        p++;
   1171 
   1172        if (ascii_isdigit((int)(*p))) {
   1173          // Positional argument field width
   1174          unsigned uj;
   1175 
   1176          if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL) {
   1177            goto error;
   1178          }
   1179 
   1180          if (*p != '$') {
   1181            semsg(_(e_invalid_format_specifier_str), fmt);
   1182            goto error;
   1183          } else {
   1184            p++;
   1185            any_pos = 1;
   1186            CHECK_POS_ARG;
   1187 
   1188            if (adjust_types(ap_types, (int)uj, num_posarg, arg) == FAIL) {
   1189              goto error;
   1190            }
   1191          }
   1192        } else {
   1193          any_arg = 1;
   1194          CHECK_POS_ARG;
   1195        }
   1196      } else if (ascii_isdigit((int)(*p))) {
   1197        // size_t could be wider than unsigned int; make sure we treat
   1198        // argument like common implementations do
   1199        const char *digstart = p;
   1200        unsigned uj;
   1201 
   1202        if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) {
   1203          goto error;
   1204        }
   1205 
   1206        if (*p == '$') {
   1207          semsg(_(e_invalid_format_specifier_str), fmt);
   1208          goto error;
   1209        }
   1210      }
   1211 
   1212      // parse precision
   1213      if (*p == '.') {
   1214        p++;
   1215 
   1216        if (*(arg = p) == '*') {
   1217          p++;
   1218 
   1219          if (ascii_isdigit((int)(*p))) {
   1220            // Parse precision
   1221            unsigned uj;
   1222 
   1223            if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL) {
   1224              goto error;
   1225            }
   1226 
   1227            if (*p == '$') {
   1228              any_pos = 1;
   1229              CHECK_POS_ARG;
   1230 
   1231              p++;
   1232 
   1233              if (adjust_types(ap_types, (int)uj, num_posarg, arg) == FAIL) {
   1234                goto error;
   1235              }
   1236            } else {
   1237              semsg(_(e_invalid_format_specifier_str), fmt);
   1238              goto error;
   1239            }
   1240          } else {
   1241            any_arg = 1;
   1242            CHECK_POS_ARG;
   1243          }
   1244        } else if (ascii_isdigit((int)(*p))) {
   1245          // size_t could be wider than unsigned int; make sure we
   1246          // treat argument like common implementations do
   1247          const char *digstart = p;
   1248          unsigned uj;
   1249 
   1250          if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) {
   1251            goto error;
   1252          }
   1253 
   1254          if (*p == '$') {
   1255            semsg(_(e_invalid_format_specifier_str), fmt);
   1256            goto error;
   1257          }
   1258        }
   1259      }
   1260 
   1261      if (pos_arg != -1) {
   1262        any_pos = 1;
   1263        CHECK_POS_ARG;
   1264 
   1265        ptype = p;
   1266      }
   1267 
   1268      // parse 'h', 'l', 'll' and 'z' length modifiers
   1269      if (*p == 'h' || *p == 'l' || *p == 'z') {
   1270        length_modifier = *p;
   1271        p++;
   1272        if (length_modifier == 'l' && *p == 'l') {
   1273          // double l = long long
   1274          // length_modifier = 'L';
   1275          p++;
   1276        }
   1277      }
   1278 
   1279      switch (*p) {
   1280      // Check for known format specifiers. % is special!
   1281      case 'i':
   1282      case '*':
   1283      case 'd':
   1284      case 'u':
   1285      case 'o':
   1286      case 'D':
   1287      case 'U':
   1288      case 'O':
   1289      case 'x':
   1290      case 'X':
   1291      case 'b':
   1292      case 'B':
   1293      case 'c':
   1294      case 's':
   1295      case 'S':
   1296      case 'p':
   1297      case 'f':
   1298      case 'F':
   1299      case 'e':
   1300      case 'E':
   1301      case 'g':
   1302      case 'G':
   1303        if (pos_arg != -1) {
   1304          if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL) {
   1305            goto error;
   1306          }
   1307        } else {
   1308          any_arg = 1;
   1309          CHECK_POS_ARG;
   1310        }
   1311        break;
   1312 
   1313      default:
   1314        if (pos_arg != -1) {
   1315          semsg(_(e_cannot_mix_positional_and_non_positional_str), fmt);
   1316          goto error;
   1317        }
   1318      }
   1319 
   1320      if (*p != NUL) {
   1321        p++;     // step over the just processed conversion specifier
   1322      }
   1323    }
   1324  }
   1325 
   1326  for (int arg_idx = 0; arg_idx < *num_posarg; arg_idx++) {
   1327    if ((*ap_types)[arg_idx] == NULL) {
   1328      semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt);
   1329      goto error;
   1330    }
   1331 
   1332    if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN) {
   1333      semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt);
   1334      goto error;
   1335    }
   1336  }
   1337 
   1338  return OK;
   1339 
   1340 error:
   1341  xfree(*ap_types);
   1342  *ap_types = NULL;
   1343  *num_posarg = 0;
   1344  return FAIL;
   1345 }
   1346 
   1347 static void skip_to_arg(const char **ap_types, va_list ap_start, va_list *ap, int *arg_idx,
   1348                        int *arg_cur, const char *fmt)
   1349  FUNC_ATTR_NONNULL_ARG(3, 4, 5)
   1350 {
   1351  int arg_min = 0;
   1352 
   1353  if (*arg_cur + 1 == *arg_idx) {
   1354    (*arg_cur)++;
   1355    (*arg_idx)++;
   1356    return;
   1357  }
   1358 
   1359  if (*arg_cur >= *arg_idx) {
   1360    // Reset ap to ap_start and skip arg_idx - 1 types
   1361    va_end(*ap);
   1362    va_copy(*ap, ap_start);
   1363  } else {
   1364    // Skip over any we should skip
   1365    arg_min = *arg_cur;
   1366  }
   1367 
   1368  for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; (*arg_cur)++) {
   1369    if (ap_types == NULL || ap_types[*arg_cur] == NULL) {
   1370      siemsg(e_aptypes_is_null_nr_str, fmt, *arg_cur);
   1371      return;
   1372    }
   1373 
   1374    const char *p = ap_types[*arg_cur];
   1375 
   1376    int fmt_type = format_typeof(p);
   1377 
   1378    // get parameter value, do initial processing
   1379    switch (fmt_type) {
   1380    case TYPE_PERCENT:
   1381    case TYPE_UNKNOWN:
   1382      break;
   1383 
   1384    case TYPE_CHAR:
   1385      va_arg(*ap, int);
   1386      break;
   1387 
   1388    case TYPE_STRING:
   1389      va_arg(*ap, const char *);
   1390      break;
   1391 
   1392    case TYPE_POINTER:
   1393      va_arg(*ap, void *);
   1394      break;
   1395 
   1396    case TYPE_INT:
   1397      va_arg(*ap, int);
   1398      break;
   1399 
   1400    case TYPE_LONGINT:
   1401      va_arg(*ap, long);
   1402      break;
   1403 
   1404    case TYPE_LONGLONGINT:
   1405      va_arg(*ap, long long);  // NOLINT(runtime/int)
   1406      break;
   1407 
   1408    case TYPE_SIGNEDSIZET:  // implementation-defined, usually ptrdiff_t
   1409      va_arg(*ap, ptrdiff_t);
   1410      break;
   1411 
   1412    case TYPE_UNSIGNEDINT:
   1413      va_arg(*ap, unsigned);
   1414      break;
   1415 
   1416    case TYPE_UNSIGNEDLONGINT:
   1417      va_arg(*ap, unsigned long);
   1418      break;
   1419 
   1420    case TYPE_UNSIGNEDLONGLONGINT:
   1421      va_arg(*ap, unsigned long long);  // NOLINT(runtime/int)
   1422      break;
   1423 
   1424    case TYPE_SIZET:
   1425      va_arg(*ap, size_t);
   1426      break;
   1427 
   1428    case TYPE_FLOAT:
   1429      va_arg(*ap, double);
   1430      break;
   1431    }
   1432  }
   1433 
   1434  // Because we know that after we return from this call,
   1435  // a va_arg() call is made, we can pre-emptively
   1436  // increment the current argument index.
   1437  (*arg_cur)++;
   1438  (*arg_idx)++;
   1439 }
   1440 
   1441 /// Write formatted value to the string
   1442 ///
   1443 /// @param[out]  str  String to write to.
   1444 /// @param[in]  str_m  String length.
   1445 /// @param[in]  fmt  String format.
   1446 /// @param[in]  ap  Values that should be formatted. Ignored if tvs is not NULL.
   1447 /// @param[in]  tvs  Values that should be formatted, for printf() Vimscript
   1448 ///                  function. Must be NULL in other cases.
   1449 ///
   1450 /// @return Number of bytes excluding NUL byte that would be written to the
   1451 ///         string if str_m was greater or equal to the return value.
   1452 int vim_vsnprintf_typval(char *str, size_t str_m, const char *fmt, va_list ap_start,
   1453                         typval_T *const tvs)
   1454 {
   1455  size_t str_l = 0;
   1456  bool str_avail = str_l < str_m;
   1457  const char *p = fmt;
   1458  int arg_cur = 0;
   1459  int num_posarg = 0;
   1460  int arg_idx = 1;
   1461  va_list ap;
   1462  const char **ap_types = NULL;
   1463 
   1464  if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL) {
   1465    return 0;
   1466  }
   1467 
   1468  va_copy(ap, ap_start);
   1469 
   1470  if (!p) {
   1471    p = "";
   1472  }
   1473  while (*p) {
   1474    if (*p != '%') {
   1475      // copy up to the next '%' or NUL without any changes
   1476      size_t n = (size_t)(xstrchrnul(p + 1, '%') - p);
   1477      if (str_avail) {
   1478        size_t avail = str_m - str_l;
   1479        memmove(str + str_l, p, MIN(n, avail));
   1480        str_avail = n < avail;
   1481      }
   1482      p += n;
   1483      assert(n <= SIZE_MAX - str_l);
   1484      str_l += n;
   1485    } else {
   1486      size_t min_field_width = 0;
   1487      size_t precision = 0;
   1488      bool zero_padding = false;
   1489      bool precision_specified = false;
   1490      bool justify_left = false;
   1491      bool alternate_form = false;
   1492      bool force_sign = false;
   1493 
   1494      // if both ' ' and '+' flags appear, ' ' flag should be ignored
   1495      int space_for_positive = 1;
   1496 
   1497      // allowed values: \0, h, l, 2 (for ll), z, L
   1498      char length_modifier = NUL;
   1499 
   1500      // temporary buffer for simple numeric->string conversion
   1501 #define TMP_LEN 350    // 1e308 seems reasonable as the maximum printable
   1502      char tmp[TMP_LEN];
   1503 
   1504      // string address in case of string argument
   1505      const char *str_arg = NULL;
   1506 
   1507      // natural field width of arg without padding and sign
   1508      size_t str_arg_l;
   1509 
   1510      // unsigned char argument value (only defined for c conversion);
   1511      // standard explicitly states the char argument for the c
   1512      // conversion is unsigned
   1513      unsigned char uchar_arg;
   1514 
   1515      // number of zeros to be inserted for numeric conversions as
   1516      // required by the precision or minimal field width
   1517      size_t number_of_zeros_to_pad = 0;
   1518 
   1519      // index into tmp where zero padding is to be inserted
   1520      size_t zero_padding_insertion_ind = 0;
   1521 
   1522      // current conversion specifier character
   1523      char fmt_spec = NUL;
   1524 
   1525      // buffer for 's' and 'S' specs
   1526      char *tofree = NULL;
   1527 
   1528      // variable for positional arg
   1529      int pos_arg = -1;
   1530 
   1531      p++;  // skip '%'
   1532 
   1533      // First check to see if we find a positional
   1534      // argument specifier
   1535      const char *ptype = p;
   1536 
   1537      while (ascii_isdigit(*ptype)) {
   1538        ptype++;
   1539      }
   1540 
   1541      if (*ptype == '$') {
   1542        // Positional argument
   1543        const char *digstart = p;
   1544        unsigned uj;
   1545 
   1546        if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) {
   1547          goto error;
   1548        }
   1549 
   1550        pos_arg = (int)uj;
   1551 
   1552        p++;
   1553      }
   1554 
   1555      // parse flags
   1556      while (true) {
   1557        switch (*p) {
   1558        case '0':
   1559          zero_padding = true; p++; continue;
   1560        case '-':
   1561          justify_left = true; p++; continue;
   1562        // if both '0' and '-' flags appear, '0' should be ignored
   1563        case '+':
   1564          force_sign = true; space_for_positive = 0; p++; continue;
   1565        case ' ':
   1566          force_sign = true; p++; continue;
   1567        // if both ' ' and '+' flags appear, ' ' should be ignored
   1568        case '#':
   1569          alternate_form = true; p++; continue;
   1570        case '\'':
   1571          p++; continue;
   1572        default:
   1573          break;
   1574        }
   1575        break;
   1576      }
   1577 
   1578      // parse field width
   1579      if (*p == '*') {
   1580        const char *digstart = p + 1;
   1581 
   1582        p++;
   1583 
   1584        if (ascii_isdigit((int)(*p))) {
   1585          // Positional argument field width
   1586          unsigned uj;
   1587 
   1588          if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) {
   1589            goto error;
   1590          }
   1591 
   1592          arg_idx = (int)uj;
   1593 
   1594          p++;
   1595        }
   1596 
   1597        int j = (tvs
   1598                 ? (int)tv_nr(tvs, &arg_idx)
   1599                 : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1600                                &arg_cur, fmt),
   1601                    va_arg(ap, int)));
   1602 
   1603        if (j > MAX_ALLOWED_STRING_WIDTH) {
   1604          if (tvs != NULL) {
   1605            format_overflow_error(digstart);
   1606            goto error;
   1607          } else {
   1608            j = MAX_ALLOWED_STRING_WIDTH;
   1609          }
   1610        }
   1611 
   1612        if (j >= 0) {
   1613          min_field_width = (size_t)j;
   1614        } else {
   1615          min_field_width = (size_t)-j;
   1616          justify_left = true;
   1617        }
   1618      } else if (ascii_isdigit((int)(*p))) {
   1619        // size_t could be wider than unsigned int; make sure we treat
   1620        // argument like common implementations do
   1621        const char *digstart = p;
   1622        unsigned uj;
   1623 
   1624        if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) {
   1625          goto error;
   1626        }
   1627 
   1628        min_field_width = uj;
   1629      }
   1630 
   1631      // parse precision
   1632      if (*p == '.') {
   1633        p++;
   1634        precision_specified = true;
   1635 
   1636        if (ascii_isdigit((int)(*p))) {
   1637          // size_t could be wider than unsigned int; make sure we
   1638          // treat argument like common implementations do
   1639          const char *digstart = p;
   1640          unsigned uj;
   1641 
   1642          if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) {
   1643            goto error;
   1644          }
   1645 
   1646          precision = uj;
   1647        } else if (*p == '*') {
   1648          const char *digstart = p;
   1649 
   1650          p++;
   1651 
   1652          if (ascii_isdigit((int)(*p))) {
   1653            // positional argument
   1654            unsigned uj;
   1655 
   1656            if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL) {
   1657              goto error;
   1658            }
   1659 
   1660            arg_idx = (int)uj;
   1661 
   1662            p++;
   1663          }
   1664 
   1665          int j = (tvs
   1666                   ? (int)tv_nr(tvs, &arg_idx)
   1667                   : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1668                                  &arg_cur, fmt),
   1669                      va_arg(ap, int)));
   1670 
   1671          if (j > MAX_ALLOWED_STRING_WIDTH) {
   1672            if (tvs != NULL) {
   1673              format_overflow_error(digstart);
   1674              goto error;
   1675            } else {
   1676              j = MAX_ALLOWED_STRING_WIDTH;
   1677            }
   1678          }
   1679 
   1680          if (j >= 0) {
   1681            precision = (size_t)j;
   1682          } else {
   1683            precision_specified = false;
   1684            precision = 0;
   1685          }
   1686        }
   1687      }
   1688 
   1689      // parse 'h', 'l', 'll' and 'z' length modifiers
   1690      if (*p == 'h' || *p == 'l' || *p == 'z') {
   1691        length_modifier = *p;
   1692        p++;
   1693        if (length_modifier == 'l' && *p == 'l') {
   1694          // double l = long long
   1695          length_modifier = 'L';
   1696          p++;
   1697        }
   1698      }
   1699 
   1700      fmt_spec = *p;
   1701 
   1702      // common synonyms
   1703      switch (fmt_spec) {
   1704      case 'i':
   1705        fmt_spec = 'd'; break;
   1706      case 'D':
   1707        fmt_spec = 'd'; length_modifier = 'l'; break;
   1708      case 'U':
   1709        fmt_spec = 'u'; length_modifier = 'l'; break;
   1710      case 'O':
   1711        fmt_spec = 'o'; length_modifier = 'l'; break;
   1712      default:
   1713        break;
   1714      }
   1715 
   1716      switch (fmt_spec) {
   1717      case 'd':
   1718      case 'u':
   1719      case 'o':
   1720      case 'x':
   1721      case 'X':
   1722        if (tvs && length_modifier == NUL) {
   1723          length_modifier = 'L';
   1724        }
   1725      }
   1726 
   1727      if (pos_arg != -1) {
   1728        arg_idx = pos_arg;
   1729      }
   1730 
   1731      // get parameter value, do initial processing
   1732      switch (fmt_spec) {
   1733      // '%' and 'c' behave similar to 's' regarding flags and field widths
   1734      case '%':
   1735      case 'c':
   1736      case 's':
   1737      case 'S':
   1738        str_arg_l = 1;
   1739        switch (fmt_spec) {
   1740        case '%':
   1741          str_arg = p;
   1742          break;
   1743 
   1744        case 'c': {
   1745          const int j = (tvs
   1746                         ? (int)tv_nr(tvs, &arg_idx)
   1747                         : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1748                                        &arg_cur, fmt),
   1749                            va_arg(ap, int)));
   1750 
   1751          // standard demands unsigned char
   1752          uchar_arg = (unsigned char)j;
   1753          str_arg = (char *)&uchar_arg;
   1754          break;
   1755        }
   1756 
   1757        case 's':
   1758        case 'S':
   1759          str_arg = (tvs
   1760                     ? tv_str(tvs, &arg_idx, &tofree)
   1761                     : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1762                                    &arg_cur, fmt),
   1763                        va_arg(ap, const char *)));
   1764 
   1765          if (!str_arg) {
   1766            str_arg = "[NULL]";
   1767            str_arg_l = 6;
   1768          } else if (!precision_specified) {
   1769            // make sure not to address string beyond the specified
   1770            // precision
   1771            str_arg_l = strlen(str_arg);
   1772          } else if (precision == 0) {
   1773            // truncate string if necessary as requested by precision
   1774            str_arg_l = 0;
   1775          } else {
   1776            // memchr on HP does not like n > 2^31
   1777            // TODO(elmart): check if this still holds / is relevant
   1778            str_arg_l = (size_t)((char *)xmemscan(str_arg,
   1779                                                  NUL,
   1780                                                  MIN(precision,
   1781                                                      0x7fffffff))
   1782                                 - str_arg);
   1783          }
   1784          if (fmt_spec == 'S') {
   1785            const char *p1;
   1786            size_t i;
   1787 
   1788            for (i = 0, p1 = str_arg; *p1; p1 += utfc_ptr2len(p1)) {
   1789              size_t cell = (size_t)utf_ptr2cells(p1);
   1790              if (precision_specified && i + cell > precision) {
   1791                break;
   1792              }
   1793              i += cell;
   1794            }
   1795 
   1796            str_arg_l = (size_t)(p1 - str_arg);
   1797            if (min_field_width != 0) {
   1798              min_field_width += str_arg_l - i;
   1799            }
   1800          }
   1801          break;
   1802 
   1803        default:
   1804          break;
   1805        }
   1806        break;
   1807 
   1808      case 'd':
   1809      case 'u':
   1810      case 'b':
   1811      case 'B':
   1812      case 'o':
   1813      case 'x':
   1814      case 'X':
   1815      case 'p': {
   1816        // u, b, B, o, x, X and p conversion specifiers imply
   1817        // the value is unsigned; d implies a signed value
   1818 
   1819        // 0 if numeric argument is zero (or if pointer is NULL for 'p'),
   1820        // +1 if greater than zero (or non NULL for 'p'),
   1821        // -1 if negative (unsigned argument is never negative)
   1822        int arg_sign = 0;
   1823 
   1824        intmax_t arg = 0;
   1825        uintmax_t uarg = 0;
   1826 
   1827        // only defined for p conversion
   1828        const void *ptr_arg = NULL;
   1829 
   1830        if (fmt_spec == 'p') {
   1831          ptr_arg = (tvs
   1832                     ? tv_ptr(tvs, &arg_idx)
   1833                     : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1834                                    &arg_cur, fmt),
   1835                        va_arg(ap, void *)));
   1836 
   1837          if (ptr_arg) {
   1838            arg_sign = 1;
   1839          }
   1840        } else if (fmt_spec == 'b' || fmt_spec == 'B') {
   1841          uarg = (tvs
   1842                  ? (unsigned long long)tv_nr(tvs, &arg_idx)  // NOLINT(runtime/int)
   1843                  : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1844                                 &arg_cur, fmt),
   1845                     va_arg(ap, unsigned long long)));  // NOLINT(runtime/int)
   1846          arg_sign = (uarg != 0);
   1847        } else if (fmt_spec == 'd') {
   1848          // signed
   1849          switch (length_modifier) {
   1850          case NUL:
   1851            arg = (tvs
   1852                   ? (int)tv_nr(tvs, &arg_idx)
   1853                   : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1854                                  &arg_cur, fmt),
   1855                      va_arg(ap, int)));
   1856            break;
   1857          case 'h':
   1858            // char and short arguments are passed as int16_t
   1859            arg = (int16_t)
   1860                  (tvs
   1861                   ? (int)tv_nr(tvs, &arg_idx)
   1862                   : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1863                                  &arg_cur, fmt),
   1864                      va_arg(ap, int)));
   1865            break;
   1866          case 'l':
   1867            arg = (tvs
   1868                   ? (long)tv_nr(tvs, &arg_idx)
   1869                   : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1870                                  &arg_cur, fmt),
   1871                      va_arg(ap, long)));
   1872            break;
   1873          case 'L':
   1874            arg = (tvs
   1875                   ? (long long)tv_nr(tvs, &arg_idx)  // NOLINT(runtime/int)
   1876                   : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1877                                  &arg_cur, fmt),
   1878                      va_arg(ap, long long)));  // NOLINT(runtime/int)
   1879            break;
   1880          case 'z':  // implementation-defined, usually ptrdiff_t
   1881            arg = (tvs
   1882                   ? (ptrdiff_t)tv_nr(tvs, &arg_idx)
   1883                   : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1884                                  &arg_cur, fmt),
   1885                      va_arg(ap, ptrdiff_t)));
   1886            break;
   1887          }
   1888          if (arg > 0) {
   1889            arg_sign = 1;
   1890          } else if (arg < 0) {
   1891            arg_sign = -1;
   1892          }
   1893        } else {
   1894          // unsigned
   1895          switch (length_modifier) {
   1896          case NUL:
   1897            uarg = (tvs
   1898                    ? (unsigned)tv_nr(tvs, &arg_idx)
   1899                    : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1900                                   &arg_cur, fmt),
   1901                       va_arg(ap, unsigned)));
   1902            break;
   1903          case 'h':
   1904            uarg = (uint16_t)
   1905                   (tvs
   1906                    ? (unsigned)tv_nr(tvs, &arg_idx)
   1907                    : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1908                                   &arg_cur, fmt),
   1909                       va_arg(ap, unsigned)));
   1910            break;
   1911          case 'l':
   1912            uarg = (tvs
   1913                    ? (unsigned long)tv_nr(tvs, &arg_idx)
   1914                    : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1915                                   &arg_cur, fmt),
   1916                       va_arg(ap, unsigned long)));
   1917            break;
   1918          case 'L':
   1919            uarg = (tvs
   1920                    ? (unsigned long long)tv_nr(tvs, &arg_idx)  // NOLINT(runtime/int)
   1921                    : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1922                                   &arg_cur, fmt),
   1923                       va_arg(ap, unsigned long long)));  // NOLINT(runtime/int)
   1924            break;
   1925          case 'z':
   1926            uarg = (tvs
   1927                    ? (size_t)tv_nr(tvs, &arg_idx)
   1928                    : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   1929                                   &arg_cur, fmt),
   1930                       va_arg(ap, size_t)));
   1931            break;
   1932          }
   1933          arg_sign = (uarg != 0);
   1934        }
   1935 
   1936        str_arg = tmp;
   1937        str_arg_l = 0;
   1938 
   1939        // For d, i, u, o, x, and X conversions, if precision is specified,
   1940        // '0' flag should be ignored. This is so with Solaris 2.6, Digital
   1941        // UNIX 4.0, HPUX 10, Linux, FreeBSD, NetBSD; but not with Perl.
   1942        if (precision_specified) {
   1943          zero_padding = false;
   1944        }
   1945 
   1946        if (fmt_spec == 'd') {
   1947          if (force_sign && arg_sign >= 0) {
   1948            tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
   1949          }
   1950          // leave negative numbers for snprintf to handle, to
   1951          // avoid handling tricky cases like (short int)-32768
   1952        } else if (alternate_form) {
   1953          if (arg_sign != 0 && (fmt_spec == 'x' || fmt_spec == 'X'
   1954                                || fmt_spec == 'b' || fmt_spec == 'B')) {
   1955            tmp[str_arg_l++] = '0';
   1956            tmp[str_arg_l++] = fmt_spec;
   1957          }
   1958          // alternate form should have no effect for p * conversion, but ...
   1959        }
   1960 
   1961        zero_padding_insertion_ind = str_arg_l;
   1962        if (!precision_specified) {
   1963          precision = 1;  // default precision is 1
   1964        }
   1965        if (precision == 0 && arg_sign == 0) {
   1966          // when zero value is formatted with an explicit precision 0,
   1967          // resulting formatted string is empty (d, i, u, b, B, o, x, X, p)
   1968        } else {
   1969          switch (fmt_spec) {
   1970          case 'p':    // pointer
   1971            str_arg_l += (size_t)snprintf(tmp + str_arg_l,
   1972                                          sizeof(tmp) - str_arg_l,
   1973                                          "%p", ptr_arg);
   1974            break;
   1975          case 'd':    // signed
   1976            str_arg_l += (size_t)snprintf(tmp + str_arg_l,
   1977                                          sizeof(tmp) - str_arg_l,
   1978                                          "%" PRIdMAX, arg);
   1979            break;
   1980          case 'b':
   1981          case 'B': {  // binary
   1982            size_t bits = 0;
   1983            for (bits = sizeof(uintmax_t) * 8; bits > 0; bits--) {
   1984              if ((uarg >> (bits - 1)) & 0x1) {
   1985                break;
   1986              }
   1987            }
   1988 
   1989            while (bits > 0) {
   1990              tmp[str_arg_l++] = ((uarg >> --bits) & 0x1) ? '1' : '0';
   1991            }
   1992            break;
   1993          }
   1994          default: {  // unsigned
   1995            // construct a simple format string for snprintf
   1996            char f[] = "%" PRIuMAX;
   1997            f[sizeof("%" PRIuMAX) - 1 - 1] = fmt_spec;
   1998            assert(PRIuMAX[sizeof(PRIuMAX) - 1 - 1] == 'u');
   1999            str_arg_l += (size_t)snprintf(tmp + str_arg_l,
   2000                                          sizeof(tmp) - str_arg_l,
   2001                                          f, uarg);
   2002            break;
   2003          }
   2004          }
   2005          assert(str_arg_l < sizeof(tmp));
   2006 
   2007          // include the optional minus sign and possible "0x" in the region
   2008          // before the zero padding insertion point
   2009          if (zero_padding_insertion_ind < str_arg_l
   2010              && tmp[zero_padding_insertion_ind] == '-') {
   2011            zero_padding_insertion_ind++;
   2012          }
   2013          if (zero_padding_insertion_ind + 1 < str_arg_l
   2014              && tmp[zero_padding_insertion_ind] == '0'
   2015              && (tmp[zero_padding_insertion_ind + 1] == 'x'
   2016                  || tmp[zero_padding_insertion_ind + 1] == 'X'
   2017                  || tmp[zero_padding_insertion_ind + 1] == 'b'
   2018                  || tmp[zero_padding_insertion_ind + 1] == 'B')) {
   2019            zero_padding_insertion_ind += 2;
   2020          }
   2021        }
   2022 
   2023        {
   2024          size_t num_of_digits = str_arg_l - zero_padding_insertion_ind;
   2025 
   2026          if (alternate_form && fmt_spec == 'o'
   2027              // unless zero is already the first character
   2028              && !(zero_padding_insertion_ind < str_arg_l
   2029                   && tmp[zero_padding_insertion_ind] == '0')) {
   2030            // assure leading zero for alternate-form octal numbers
   2031            if (!precision_specified || precision < num_of_digits + 1) {
   2032              // precision is increased to force the first character to be
   2033              // zero, except if a zero value is formatted with an explicit
   2034              // precision of zero
   2035              precision = num_of_digits + 1;
   2036            }
   2037          }
   2038          // zero padding to specified precision?
   2039          if (num_of_digits < precision) {
   2040            number_of_zeros_to_pad = precision - num_of_digits;
   2041          }
   2042        }
   2043        // zero padding to specified minimal field width?
   2044        if (!justify_left && zero_padding) {
   2045          const int n = (int)(min_field_width - (str_arg_l
   2046                                                 + number_of_zeros_to_pad));
   2047          if (n > 0) {
   2048            number_of_zeros_to_pad += (size_t)n;
   2049          }
   2050        }
   2051        break;
   2052      }
   2053 
   2054      case 'f':
   2055      case 'F':
   2056      case 'e':
   2057      case 'E':
   2058      case 'g':
   2059      case 'G': {
   2060        // floating point
   2061        char format[40];
   2062        bool remove_trailing_zeroes = false;
   2063 
   2064        double f = (tvs
   2065                    ? tv_float(tvs, &arg_idx)
   2066                    : (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
   2067                                   &arg_cur, fmt),
   2068                       va_arg(ap, double)));
   2069 
   2070        double abs_f = f < 0 ? -f : f;
   2071 
   2072        if (fmt_spec == 'g' || fmt_spec == 'G') {
   2073          // can't use %g directly, cause it prints "1.0" as "1"
   2074          if ((abs_f >= 0.001 && abs_f < 10000000.0) || abs_f == 0.0) {
   2075            fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
   2076          } else {
   2077            fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
   2078          }
   2079          remove_trailing_zeroes = true;
   2080        }
   2081 
   2082        if (xisinf(f)
   2083            || (strchr("fF", fmt_spec) != NULL && abs_f > 1.0e307)) {
   2084          xstrlcpy(tmp, infinity_str(f > 0.0, fmt_spec,
   2085                                     force_sign, space_for_positive),
   2086                   sizeof(tmp));
   2087          str_arg_l = strlen(tmp);
   2088          zero_padding = false;
   2089        } else if (xisnan(f)) {
   2090          // Not a number: nan or NAN
   2091          memmove(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN" : "nan", 4);
   2092          str_arg_l = 3;
   2093          zero_padding = false;
   2094        } else {
   2095          // Regular float number
   2096          format[0] = '%';
   2097          size_t l = 1;
   2098          if (force_sign) {
   2099            format[l++] = space_for_positive ? ' ' : '+';
   2100          }
   2101          if (precision_specified) {
   2102            size_t max_prec = TMP_LEN - 10;
   2103 
   2104            // make sure we don't get more digits than we have room for
   2105            if ((fmt_spec == 'f' || fmt_spec == 'F') && abs_f > 1.0) {
   2106              max_prec -= (size_t)log10(abs_f);
   2107            }
   2108            if (precision > max_prec) {
   2109              precision = max_prec;
   2110            }
   2111            l += (size_t)snprintf(format + l, sizeof(format) - l, ".%d",
   2112                                  (int)precision);
   2113          }
   2114 
   2115          // Cast to char to avoid a conversion warning on Ubuntu 12.04.
   2116          assert(l + 1 < sizeof(format));
   2117          format[l] = (char)(fmt_spec == 'F' ? 'f' : fmt_spec);
   2118          format[l + 1] = NUL;
   2119 
   2120          str_arg_l = (size_t)snprintf(tmp, sizeof(tmp), format, f);
   2121          assert(str_arg_l < sizeof(tmp));
   2122 
   2123          if (remove_trailing_zeroes) {
   2124            char *tp;
   2125 
   2126            // using %g or %G: remove superfluous zeroes
   2127            if (fmt_spec == 'f' || fmt_spec == 'F') {
   2128              tp = tmp + str_arg_l - 1;
   2129            } else {
   2130              tp = vim_strchr(tmp, fmt_spec == 'e' ? 'e' : 'E');
   2131              if (tp) {
   2132                // remove superfluous '+' and leading zeroes from exponent
   2133                if (tp[1] == '+') {
   2134                  // change "1.0e+07" to "1.0e07"
   2135                  STRMOVE(tp + 1, tp + 2);
   2136                  str_arg_l--;
   2137                }
   2138                int i = (tp[1] == '-') ? 2 : 1;
   2139                while (tp[i] == '0') {
   2140                  // change "1.0e07" to "1.0e7"
   2141                  STRMOVE(tp + i, tp + i + 1);
   2142                  str_arg_l--;
   2143                }
   2144                tp--;
   2145              }
   2146            }
   2147 
   2148            if (tp != NULL && !precision_specified) {
   2149              // remove trailing zeroes, but keep the one just after a dot
   2150              while (tp > tmp + 2 && *tp == '0' && tp[-1] != '.') {
   2151                STRMOVE(tp, tp + 1);
   2152                tp--;
   2153                str_arg_l--;
   2154              }
   2155            }
   2156          } else {
   2157            // Be consistent: some printf("%e") use 1.0e+12 and some
   2158            // 1.0e+012; remove one zero in the last case.
   2159            char *tp = vim_strchr(tmp, fmt_spec == 'e' ? 'e' : 'E');
   2160            if (tp && (tp[1] == '+' || tp[1] == '-') && tp[2] == '0'
   2161                && ascii_isdigit(tp[3]) && ascii_isdigit(tp[4])) {
   2162              STRMOVE(tp + 2, tp + 3);
   2163              str_arg_l--;
   2164            }
   2165          }
   2166        }
   2167        if (zero_padding && min_field_width > str_arg_l
   2168            && (tmp[0] == '-' || force_sign)) {
   2169          // Padding 0's should be inserted after the sign.
   2170          number_of_zeros_to_pad = min_field_width - str_arg_l;
   2171          zero_padding_insertion_ind = 1;
   2172        }
   2173        str_arg = tmp;
   2174        break;
   2175      }
   2176 
   2177      default:
   2178        // unrecognized conversion specifier, keep format string as-is
   2179        zero_padding = false;  // turn zero padding off for non-numeric conversion
   2180        justify_left = true;
   2181        min_field_width = 0;  // reset flags
   2182 
   2183        // discard the unrecognized conversion, just keep
   2184        // the unrecognized conversion character
   2185        str_arg = p;
   2186        str_arg_l = 0;
   2187        if (*p) {
   2188          str_arg_l++;  // include invalid conversion specifier
   2189        }
   2190        // unchanged if not at end-of-string
   2191        break;
   2192      }
   2193 
   2194      if (*p) {
   2195        p++;  // step over the just processed conversion specifier
   2196      }
   2197 
   2198      // insert padding to the left as requested by min_field_width;
   2199      // this does not include the zero padding in case of numerical conversions
   2200      if (!justify_left) {
   2201        assert(str_arg_l <= SIZE_MAX - number_of_zeros_to_pad);
   2202        if (min_field_width > str_arg_l + number_of_zeros_to_pad) {
   2203          // left padding with blank or zero
   2204          size_t pn = min_field_width - (str_arg_l + number_of_zeros_to_pad);
   2205          if (str_avail) {
   2206            size_t avail = str_m - str_l;
   2207            memset(str + str_l, zero_padding ? '0' : ' ', MIN(pn, avail));
   2208            str_avail = pn < avail;
   2209          }
   2210          assert(pn <= SIZE_MAX - str_l);
   2211          str_l += pn;
   2212        }
   2213      }
   2214 
   2215      // zero padding as requested by the precision or by the minimal
   2216      // field width for numeric conversions required?
   2217      if (number_of_zeros_to_pad == 0) {
   2218        // will not copy first part of numeric right now,
   2219        // force it to be copied later in its entirety
   2220        zero_padding_insertion_ind = 0;
   2221      } else {
   2222        // insert first part of numerics (sign or '0x') before zero padding
   2223        if (zero_padding_insertion_ind > 0) {
   2224          size_t zn = zero_padding_insertion_ind;
   2225          if (str_avail) {
   2226            size_t avail = str_m - str_l;
   2227            memmove(str + str_l, str_arg, MIN(zn, avail));
   2228            str_avail = zn < avail;
   2229          }
   2230          assert(zn <= SIZE_MAX - str_l);
   2231          str_l += zn;
   2232        }
   2233 
   2234        // insert zero padding as requested by precision or min field width
   2235        size_t zn = number_of_zeros_to_pad;
   2236        if (str_avail) {
   2237          size_t avail = str_m - str_l;
   2238          memset(str + str_l, '0', MIN(zn, avail));
   2239          str_avail = zn < avail;
   2240        }
   2241        assert(zn <= SIZE_MAX - str_l);
   2242        str_l += zn;
   2243      }
   2244 
   2245      // insert formatted string
   2246      // (or as-is conversion specifier for unknown conversions)
   2247      if (str_arg_l > zero_padding_insertion_ind) {
   2248        size_t sn = str_arg_l - zero_padding_insertion_ind;
   2249        if (str_avail) {
   2250          size_t avail = str_m - str_l;
   2251          memmove(str + str_l,
   2252                  str_arg + zero_padding_insertion_ind,
   2253                  MIN(sn, avail));
   2254          str_avail = sn < avail;
   2255        }
   2256        assert(sn <= SIZE_MAX - str_l);
   2257        str_l += sn;
   2258      }
   2259 
   2260      // insert right padding
   2261      if (justify_left) {
   2262        assert(str_arg_l <= SIZE_MAX - number_of_zeros_to_pad);
   2263        if (min_field_width > str_arg_l + number_of_zeros_to_pad) {
   2264          // right blank padding to the field width
   2265          size_t pn = min_field_width - (str_arg_l + number_of_zeros_to_pad);
   2266          if (str_avail) {
   2267            size_t avail = str_m - str_l;
   2268            memset(str + str_l, ' ', MIN(pn, avail));
   2269            str_avail = pn < avail;
   2270          }
   2271          assert(pn <= SIZE_MAX - str_l);
   2272          str_l += pn;
   2273        }
   2274      }
   2275 
   2276      xfree(tofree);
   2277    }
   2278  }
   2279 
   2280  if (str_m > 0) {
   2281    // make sure the string is nul-terminated even at the expense of
   2282    // overwriting the last character (shouldn't happen, but just in case)
   2283    str[str_l <= str_m - 1 ? str_l : str_m - 1] = NUL;
   2284  }
   2285 
   2286  if (tvs != NULL
   2287      && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN) {
   2288    emsg(_("E767: Too many arguments to printf()"));
   2289  }
   2290 
   2291 error:
   2292  xfree(ap_types);
   2293  va_end(ap);
   2294 
   2295  // return the number of characters formatted (excluding trailing nul
   2296  // character); that is, the number of characters that would have been
   2297  // written to the buffer if it were large enough.
   2298  return (int)str_l;
   2299 }
   2300 
   2301 int kv_do_printf(StringBuilder *str, const char *fmt, ...)
   2302  FUNC_ATTR_PRINTF(2, 3)
   2303 {
   2304  size_t remaining = str->capacity - str->size;
   2305 
   2306  va_list ap;
   2307  va_start(ap, fmt);
   2308  int printed = vsnprintf(str->items ? str->items + str->size : NULL, remaining, fmt, ap);
   2309  va_end(ap);
   2310 
   2311  if (printed < 0) {
   2312    return -1;
   2313  }
   2314 
   2315  // printed string didn't fit, resize and try again
   2316  if ((size_t)printed >= remaining) {
   2317    kv_ensure_space(*str, (size_t)printed + 1);  // include space for NUL terminator at the end
   2318    assert(str->items != NULL);
   2319    va_start(ap, fmt);
   2320    printed = vsnprintf(str->items + str->size, str->capacity - str->size, fmt, ap);
   2321    va_end(ap);
   2322    if (printed < 0) {
   2323      return -1;
   2324    }
   2325  }
   2326 
   2327  str->size += (size_t)printed;
   2328  return printed;
   2329 }
   2330 
   2331 String arena_printf(Arena *arena, const char *fmt, ...)
   2332  FUNC_ATTR_PRINTF(2, 3)
   2333 {
   2334  size_t remaining = 0;
   2335  char *buf = NULL;
   2336  if (arena) {
   2337    if (!arena->cur_blk) {
   2338      arena_alloc_block(arena);
   2339    }
   2340 
   2341    // happy case, we can fit the printed string in the rest of the current
   2342    // block (one pass):
   2343    remaining = arena->size - arena->pos;
   2344    buf = arena->cur_blk + arena->pos;
   2345  }
   2346 
   2347  va_list ap;
   2348  va_start(ap, fmt);
   2349  int printed = vsnprintf(buf, remaining, fmt, ap);
   2350  va_end(ap);
   2351 
   2352  if (printed < 0) {
   2353    return (String)STRING_INIT;
   2354  }
   2355 
   2356  // printed string didn't fit, allocate and try again
   2357  if ((size_t)printed >= remaining) {
   2358    buf = arena_alloc(arena, (size_t)printed + 1, false);
   2359    va_start(ap, fmt);
   2360    printed = vsnprintf(buf, (size_t)printed + 1, fmt, ap);
   2361    va_end(ap);
   2362    if (printed < 0) {
   2363      return (String)STRING_INIT;
   2364    }
   2365  } else {
   2366    arena->pos += (size_t)printed + 1;
   2367  }
   2368 
   2369  return cbuf_as_string(buf, (size_t)printed);
   2370 }
   2371 
   2372 /// Reverse text into allocated memory.
   2373 ///
   2374 /// @return  the allocated string.
   2375 char *reverse_text(char *s)
   2376  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_NONNULL_RET
   2377 {
   2378  size_t len = strlen(s);
   2379  char *rev = xmalloc(len + 1);
   2380  for (size_t s_i = 0, rev_i = len; s_i < len; s_i++) {
   2381    const int mb_len = utfc_ptr2len(s + s_i);
   2382    rev_i -= (size_t)mb_len;
   2383    memmove(rev + rev_i, s + s_i, (size_t)mb_len);
   2384    s_i += (size_t)mb_len - 1;
   2385  }
   2386  rev[len] = NUL;
   2387  return rev;
   2388 }
   2389 
   2390 /// Replace all occurrences of "what" with "rep" in "src". If no replacement happens then NULL is
   2391 /// returned otherwise return a newly allocated string.
   2392 ///
   2393 /// @param[in] src  Source text
   2394 /// @param[in] what Substring to replace
   2395 /// @param[in] rep  Substring to replace with
   2396 ///
   2397 /// @return [allocated] Copy of the string.
   2398 char *strrep(const char *src, const char *what, const char *rep)
   2399 {
   2400  const char *pos = src;
   2401  size_t whatlen = strlen(what);
   2402 
   2403  // Count occurrences
   2404  size_t count = 0;
   2405  while ((pos = strstr(pos, what)) != NULL) {
   2406    count++;
   2407    pos += whatlen;
   2408  }
   2409 
   2410  if (count == 0) {
   2411    return NULL;
   2412  }
   2413 
   2414  size_t replen = strlen(rep);
   2415  char *ret = xmalloc(strlen(src) + count * (replen - whatlen) + 1);
   2416  char *ptr = ret;
   2417  while ((pos = strstr(src, what)) != NULL) {
   2418    size_t idx = (size_t)(pos - src);
   2419    memcpy(ptr, src, idx);
   2420    ptr += idx;
   2421    STRCPY(ptr, rep);
   2422    ptr += replen;
   2423    src = pos + whatlen;
   2424  }
   2425 
   2426  // Copy remaining
   2427  STRCPY(ptr, src);
   2428 
   2429  return ret;
   2430 }
   2431 
   2432 /// Implementation of "byteidx()" and "byteidxcomp()" functions
   2433 static void byteidx_common(typval_T *argvars, typval_T *rettv, bool comp)
   2434 {
   2435  rettv->vval.v_number = -1;
   2436 
   2437  const char *const str = tv_get_string_chk(&argvars[0]);
   2438  varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
   2439  if (str == NULL || idx < 0) {
   2440    return;
   2441  }
   2442 
   2443  varnumber_T utf16idx = false;
   2444  if (argvars[2].v_type != VAR_UNKNOWN) {
   2445    bool error = false;
   2446    utf16idx = tv_get_bool_chk(&argvars[2], &error);
   2447    if (error) {
   2448      return;
   2449    }
   2450    if (utf16idx < 0 || utf16idx > 1) {
   2451      semsg(_(e_using_number_as_bool_nr), utf16idx);
   2452      return;
   2453    }
   2454  }
   2455 
   2456  int (*ptr2len)(const char *);
   2457  if (comp) {
   2458    ptr2len = utf_ptr2len;
   2459  } else {
   2460    ptr2len = utfc_ptr2len;
   2461  }
   2462 
   2463  const char *t = str;
   2464  for (; idx > 0; idx--) {
   2465    if (*t == NUL) {  // EOL reached.
   2466      return;
   2467    }
   2468    if (utf16idx) {
   2469      const int clen = ptr2len(t);
   2470      const int c = (clen > 1) ? utf_ptr2char(t) : *t;
   2471      if (c > 0xFFFF) {
   2472        idx--;
   2473      }
   2474    }
   2475    if (idx > 0) {
   2476      t += ptr2len(t);
   2477    }
   2478  }
   2479  rettv->vval.v_number = (varnumber_T)(t - str);
   2480 }
   2481 
   2482 /// "byteidx()" function
   2483 void f_byteidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2484 {
   2485  byteidx_common(argvars, rettv, false);
   2486 }
   2487 
   2488 /// "byteidxcomp()" function
   2489 void f_byteidxcomp(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2490 {
   2491  byteidx_common(argvars, rettv, true);
   2492 }
   2493 
   2494 /// "charidx()" function
   2495 void f_charidx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2496 {
   2497  rettv->vval.v_number = -1;
   2498 
   2499  if (tv_check_for_string_arg(argvars, 0) == FAIL
   2500      || tv_check_for_number_arg(argvars, 1) == FAIL
   2501      || tv_check_for_opt_bool_arg(argvars, 2) == FAIL
   2502      || (argvars[2].v_type != VAR_UNKNOWN
   2503          && tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) {
   2504    return;
   2505  }
   2506 
   2507  const char *const str = tv_get_string_chk(&argvars[0]);
   2508  varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
   2509  if (str == NULL || idx < 0) {
   2510    return;
   2511  }
   2512 
   2513  varnumber_T countcc = false;
   2514  varnumber_T utf16idx = false;
   2515  if (argvars[2].v_type != VAR_UNKNOWN) {
   2516    countcc = tv_get_bool(&argvars[2]);
   2517    if (argvars[3].v_type != VAR_UNKNOWN) {
   2518      utf16idx = tv_get_bool(&argvars[3]);
   2519    }
   2520  }
   2521 
   2522  int (*ptr2len)(const char *);
   2523  if (countcc) {
   2524    ptr2len = utf_ptr2len;
   2525  } else {
   2526    ptr2len = utfc_ptr2len;
   2527  }
   2528 
   2529  const char *p;
   2530  int len;
   2531  for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++) {
   2532    if (*p == NUL) {
   2533      // If the index is exactly the number of bytes or utf-16 code units
   2534      // in the string then return the length of the string in characters.
   2535      if (utf16idx ? (idx == 0) : (p == (str + idx))) {
   2536        rettv->vval.v_number = len;
   2537      }
   2538      return;
   2539    }
   2540    if (utf16idx) {
   2541      idx--;
   2542      const int clen = ptr2len(p);
   2543      const int c = (clen > 1) ? utf_ptr2char(p) : *p;
   2544      if (c > 0xFFFF) {
   2545        idx--;
   2546      }
   2547    }
   2548    p += ptr2len(p);
   2549  }
   2550 
   2551  rettv->vval.v_number = len > 0 ? len - 1 : 0;
   2552 }
   2553 
   2554 /// "str2list()" function
   2555 void f_str2list(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2556 {
   2557  tv_list_alloc_ret(rettv, kListLenUnknown);
   2558  const char *p = tv_get_string(&argvars[0]);
   2559 
   2560  for (; *p != NUL; p += utf_ptr2len(p)) {
   2561    tv_list_append_number(rettv->vval.v_list, utf_ptr2char(p));
   2562  }
   2563 }
   2564 
   2565 /// "str2nr()" function
   2566 void f_str2nr(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2567 {
   2568  int base = 10;
   2569  int what = 0;
   2570 
   2571  if (argvars[1].v_type != VAR_UNKNOWN) {
   2572    base = (int)tv_get_number(&argvars[1]);
   2573    if (base != 2 && base != 8 && base != 10 && base != 16) {
   2574      emsg(_(e_invarg));
   2575      return;
   2576    }
   2577    if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2])) {
   2578      what |= STR2NR_QUOTE;
   2579    }
   2580  }
   2581 
   2582  char *p = skipwhite(tv_get_string(&argvars[0]));
   2583  bool isneg = (*p == '-');
   2584  if (*p == '+' || *p == '-') {
   2585    p = skipwhite(p + 1);
   2586  }
   2587  switch (base) {
   2588  case 2:
   2589    what |= STR2NR_BIN | STR2NR_FORCE;
   2590    break;
   2591  case 8:
   2592    what |= STR2NR_OCT | STR2NR_OOCT | STR2NR_FORCE;
   2593    break;
   2594  case 16:
   2595    what |= STR2NR_HEX | STR2NR_FORCE;
   2596    break;
   2597  }
   2598  varnumber_T n;
   2599  vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, false, NULL);
   2600  // Text after the number is silently ignored.
   2601  if (isneg) {
   2602    rettv->vval.v_number = -n;
   2603  } else {
   2604    rettv->vval.v_number = n;
   2605  }
   2606 }
   2607 
   2608 /// "strgetchar()" function
   2609 void f_strgetchar(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2610 {
   2611  rettv->vval.v_number = -1;
   2612 
   2613  const char *const str = tv_get_string_chk(&argvars[0]);
   2614  if (str == NULL) {
   2615    return;
   2616  }
   2617  bool error = false;
   2618  varnumber_T charidx = tv_get_number_chk(&argvars[1], &error);
   2619  if (error) {
   2620    return;
   2621  }
   2622 
   2623  const size_t len = strlen(str);
   2624  size_t byteidx = 0;
   2625 
   2626  while (charidx >= 0 && byteidx < len) {
   2627    if (charidx == 0) {
   2628      rettv->vval.v_number = utf_ptr2char(str + byteidx);
   2629      break;
   2630    }
   2631    charidx--;
   2632    byteidx += (size_t)utf_ptr2len(str + byteidx);
   2633  }
   2634 }
   2635 
   2636 /// "stridx()" function
   2637 void f_stridx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2638 {
   2639  rettv->vval.v_number = -1;
   2640 
   2641  char buf[NUMBUFLEN];
   2642  const char *const needle = tv_get_string_chk(&argvars[1]);
   2643  const char *haystack = tv_get_string_buf_chk(&argvars[0], buf);
   2644  const char *const haystack_start = haystack;
   2645  if (needle == NULL || haystack == NULL) {
   2646    return;  // Type error; errmsg already given.
   2647  }
   2648 
   2649  if (argvars[2].v_type != VAR_UNKNOWN) {
   2650    bool error = false;
   2651 
   2652    const ptrdiff_t start_idx = (ptrdiff_t)tv_get_number_chk(&argvars[2],
   2653                                                             &error);
   2654    if (error || start_idx >= (ptrdiff_t)strlen(haystack)) {
   2655      return;
   2656    }
   2657    if (start_idx >= 0) {
   2658      haystack += start_idx;
   2659    }
   2660  }
   2661 
   2662  const char *pos = strstr(haystack, needle);
   2663  if (pos != NULL) {
   2664    rettv->vval.v_number = (varnumber_T)(pos - haystack_start);
   2665  }
   2666 }
   2667 
   2668 /// "string()" function
   2669 void f_string(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2670 {
   2671  rettv->v_type = VAR_STRING;
   2672  rettv->vval.v_string = encode_tv2string(&argvars[0], NULL);
   2673 }
   2674 
   2675 /// "strlen()" function
   2676 void f_strlen(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2677 {
   2678  rettv->vval.v_number = (varnumber_T)strlen(tv_get_string(&argvars[0]));
   2679 }
   2680 
   2681 static void strchar_common(typval_T *argvars, typval_T *rettv, bool skipcc)
   2682 {
   2683  const char *s = tv_get_string(&argvars[0]);
   2684  varnumber_T len = 0;
   2685  int (*func_mb_ptr2char_adv)(const char **pp);
   2686 
   2687  func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
   2688  while (*s != NUL) {
   2689    func_mb_ptr2char_adv(&s);
   2690    len++;
   2691  }
   2692  rettv->vval.v_number = len;
   2693 }
   2694 
   2695 /// "strcharlen()" function
   2696 void f_strcharlen(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2697 {
   2698  strchar_common(argvars, rettv, true);
   2699 }
   2700 
   2701 /// "strchars()" function
   2702 void f_strchars(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2703 {
   2704  varnumber_T skipcc = false;
   2705 
   2706  if (argvars[1].v_type != VAR_UNKNOWN) {
   2707    bool error = false;
   2708    skipcc = tv_get_bool_chk(&argvars[1], &error);
   2709    if (error) {
   2710      return;
   2711    }
   2712    if (skipcc < 0 || skipcc > 1) {
   2713      semsg(_(e_using_number_as_bool_nr), skipcc);
   2714      return;
   2715    }
   2716  }
   2717 
   2718  strchar_common(argvars, rettv, skipcc);
   2719 }
   2720 
   2721 /// "strutf16len()" function
   2722 void f_strutf16len(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2723 {
   2724  rettv->vval.v_number = -1;
   2725 
   2726  if (tv_check_for_string_arg(argvars, 0) == FAIL
   2727      || tv_check_for_opt_bool_arg(argvars, 1) == FAIL) {
   2728    return;
   2729  }
   2730 
   2731  varnumber_T countcc = false;
   2732  if (argvars[1].v_type != VAR_UNKNOWN) {
   2733    countcc = tv_get_bool(&argvars[1]);
   2734  }
   2735 
   2736  const char *s = tv_get_string(&argvars[0]);
   2737  varnumber_T len = 0;
   2738  int (*func_mb_ptr2char_adv)(const char **pp);
   2739 
   2740  func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
   2741  while (*s != NUL) {
   2742    const int ch = func_mb_ptr2char_adv(&s);
   2743    if (ch > 0xFFFF) {
   2744      len++;
   2745    }
   2746    len++;
   2747  }
   2748  rettv->vval.v_number = len;
   2749 }
   2750 
   2751 /// "strdisplaywidth()" function
   2752 void f_strdisplaywidth(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2753 {
   2754  const char *const s = tv_get_string(&argvars[0]);
   2755  int col = 0;
   2756 
   2757  if (argvars[1].v_type != VAR_UNKNOWN) {
   2758    col = (int)tv_get_number(&argvars[1]);
   2759  }
   2760 
   2761  rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, (char *)s) - col);
   2762 }
   2763 
   2764 /// "strwidth()" function
   2765 void f_strwidth(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2766 {
   2767  const char *const s = tv_get_string(&argvars[0]);
   2768 
   2769  rettv->vval.v_number = (varnumber_T)mb_string2cells(s);
   2770 }
   2771 
   2772 /// "strcharpart()" function
   2773 void f_strcharpart(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2774 {
   2775  const char *const p = tv_get_string(&argvars[0]);
   2776  const size_t slen = strlen(p);
   2777 
   2778  int nbyte = 0;
   2779  varnumber_T skipcc = false;
   2780  bool error = false;
   2781  varnumber_T nchar = tv_get_number_chk(&argvars[1], &error);
   2782  if (!error) {
   2783    if (argvars[2].v_type != VAR_UNKNOWN
   2784        && argvars[3].v_type != VAR_UNKNOWN) {
   2785      skipcc = tv_get_bool_chk(&argvars[3], &error);
   2786      if (error) {
   2787        return;
   2788      }
   2789      if (skipcc < 0 || skipcc > 1) {
   2790        semsg(_(e_using_number_as_bool_nr), skipcc);
   2791        return;
   2792      }
   2793    }
   2794 
   2795    if (nchar > 0) {
   2796      while (nchar > 0 && (size_t)nbyte < slen) {
   2797        if (skipcc) {
   2798          nbyte += utfc_ptr2len(p + nbyte);
   2799        } else {
   2800          nbyte += utf_ptr2len(p + nbyte);
   2801        }
   2802        nchar--;
   2803      }
   2804    } else {
   2805      nbyte = (int)nchar;
   2806    }
   2807  }
   2808  int len = 0;
   2809  if (argvars[2].v_type != VAR_UNKNOWN) {
   2810    int charlen = (int)tv_get_number(&argvars[2]);
   2811    while (charlen > 0 && nbyte + len < (int)slen) {
   2812      int off = nbyte + len;
   2813 
   2814      if (off < 0) {
   2815        len += 1;
   2816      } else {
   2817        if (skipcc) {
   2818          len += utfc_ptr2len(p + off);
   2819        } else {
   2820          len += utf_ptr2len(p + off);
   2821        }
   2822      }
   2823      charlen--;
   2824    }
   2825  } else {
   2826    len = (int)slen - nbyte;    // default: all bytes that are available.
   2827  }
   2828 
   2829  // Only return the overlap between the specified part and the actual
   2830  // string.
   2831  if (nbyte < 0) {
   2832    len += nbyte;
   2833    nbyte = 0;
   2834  } else if ((size_t)nbyte > slen) {
   2835    nbyte = (int)slen;
   2836  }
   2837  if (len < 0) {
   2838    len = 0;
   2839  } else if (nbyte + len > (int)slen) {
   2840    len = (int)slen - nbyte;
   2841  }
   2842 
   2843  rettv->v_type = VAR_STRING;
   2844  rettv->vval.v_string = xmemdupz(p + nbyte, (size_t)len);
   2845 }
   2846 
   2847 /// "strpart()" function
   2848 void f_strpart(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2849 {
   2850  bool error = false;
   2851 
   2852  const char *const p = tv_get_string(&argvars[0]);
   2853  const size_t slen = strlen(p);
   2854 
   2855  varnumber_T n = tv_get_number_chk(&argvars[1], &error);
   2856  varnumber_T len;
   2857  if (error) {
   2858    len = 0;
   2859  } else if (argvars[2].v_type != VAR_UNKNOWN) {
   2860    len = tv_get_number(&argvars[2]);
   2861  } else {
   2862    len = (varnumber_T)slen - n;  // Default len: all bytes that are available.
   2863  }
   2864 
   2865  // Only return the overlap between the specified part and the actual
   2866  // string.
   2867  if (n < 0) {
   2868    len += n;
   2869    n = 0;
   2870  } else if (n > (varnumber_T)slen) {
   2871    n = (varnumber_T)slen;
   2872  }
   2873  if (len < 0) {
   2874    len = 0;
   2875  } else if (n + len > (varnumber_T)slen) {
   2876    len = (varnumber_T)slen - n;
   2877  }
   2878 
   2879  if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN) {
   2880    int64_t off;
   2881 
   2882    // length in characters
   2883    for (off = n; off < (int64_t)slen && len > 0; len--) {
   2884      off += utfc_ptr2len(p + off);
   2885    }
   2886    len = off - n;
   2887  }
   2888 
   2889  rettv->v_type = VAR_STRING;
   2890  rettv->vval.v_string = xmemdupz(p + n, (size_t)len);
   2891 }
   2892 
   2893 /// "strridx()" function
   2894 void f_strridx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2895 {
   2896  char buf[NUMBUFLEN];
   2897  const char *const needle = tv_get_string_chk(&argvars[1]);
   2898  const char *const haystack = tv_get_string_buf_chk(&argvars[0], buf);
   2899 
   2900  rettv->vval.v_number = -1;
   2901  if (needle == NULL || haystack == NULL) {
   2902    return;  // Type error; errmsg already given.
   2903  }
   2904 
   2905  const size_t haystack_len = strlen(haystack);
   2906  ptrdiff_t end_idx;
   2907  if (argvars[2].v_type != VAR_UNKNOWN) {
   2908    // Third argument: upper limit for index.
   2909    end_idx = (ptrdiff_t)tv_get_number_chk(&argvars[2], NULL);
   2910    if (end_idx < 0) {
   2911      return;  // Can never find a match.
   2912    }
   2913  } else {
   2914    end_idx = (ptrdiff_t)haystack_len;
   2915  }
   2916 
   2917  const char *lastmatch = NULL;
   2918  if (*needle == NUL) {
   2919    // Empty string matches past the end.
   2920    lastmatch = haystack + end_idx;
   2921  } else {
   2922    for (const char *rest = haystack; *rest != NUL; rest++) {
   2923      rest = strstr(rest, needle);
   2924      if (rest == NULL || rest > haystack + end_idx) {
   2925        break;
   2926      }
   2927      lastmatch = rest;
   2928    }
   2929  }
   2930 
   2931  if (lastmatch != NULL) {
   2932    rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
   2933  }
   2934 }
   2935 
   2936 /// "strtrans()" function
   2937 void f_strtrans(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2938 {
   2939  rettv->v_type = VAR_STRING;
   2940  rettv->vval.v_string = transstr(tv_get_string(&argvars[0]), true);
   2941 }
   2942 
   2943 /// "utf16idx()" function
   2944 ///
   2945 /// Converts a byte or character offset in a string to the corresponding UTF-16
   2946 /// code unit offset.
   2947 void f_utf16idx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   2948 {
   2949  rettv->vval.v_number = -1;
   2950 
   2951  if (tv_check_for_string_arg(argvars, 0) == FAIL
   2952      || tv_check_for_opt_number_arg(argvars, 1) == FAIL
   2953      || tv_check_for_opt_bool_arg(argvars, 2) == FAIL
   2954      || (argvars[2].v_type != VAR_UNKNOWN
   2955          && tv_check_for_opt_bool_arg(argvars, 3) == FAIL)) {
   2956    return;
   2957  }
   2958 
   2959  const char *const str = tv_get_string_chk(&argvars[0]);
   2960  varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
   2961  if (str == NULL || idx < 0) {
   2962    return;
   2963  }
   2964 
   2965  varnumber_T countcc = false;
   2966  varnumber_T charidx = false;
   2967  if (argvars[2].v_type != VAR_UNKNOWN) {
   2968    countcc = tv_get_bool(&argvars[2]);
   2969    if (argvars[3].v_type != VAR_UNKNOWN) {
   2970      charidx = tv_get_bool(&argvars[3]);
   2971    }
   2972  }
   2973 
   2974  int (*ptr2len)(const char *);
   2975  if (countcc) {
   2976    ptr2len = utf_ptr2len;
   2977  } else {
   2978    ptr2len = utfc_ptr2len;
   2979  }
   2980 
   2981  const char *p;
   2982  int len;
   2983  int utf16idx = 0;
   2984  for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) {
   2985    if (*p == NUL) {
   2986      // If the index is exactly the number of bytes or characters in the
   2987      // string then return the length of the string in utf-16 code units.
   2988      if (charidx ? (idx == 0) : (p == (str + idx))) {
   2989        rettv->vval.v_number = len;
   2990      }
   2991      return;
   2992    }
   2993    utf16idx = len;
   2994    const int clen = ptr2len(p);
   2995    const int c = (clen > 1) ? utf_ptr2char(p) : *p;
   2996    if (c > 0xFFFF) {
   2997      len++;
   2998    }
   2999    p += ptr2len(p);
   3000    if (charidx) {
   3001      idx--;
   3002    }
   3003  }
   3004 
   3005  rettv->vval.v_number = utf16idx;
   3006 }
   3007 
   3008 /// "tolower(string)" function
   3009 void f_tolower(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   3010 {
   3011  rettv->v_type = VAR_STRING;
   3012  rettv->vval.v_string = strcase_save(tv_get_string(&argvars[0]), false);
   3013 }
   3014 
   3015 /// "toupper(string)" function
   3016 void f_toupper(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   3017 {
   3018  rettv->v_type = VAR_STRING;
   3019  rettv->vval.v_string = strcase_save(tv_get_string(&argvars[0]), true);
   3020 }
   3021 
   3022 /// "tr(string, fromstr, tostr)" function
   3023 void f_tr(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   3024 {
   3025  char buf[NUMBUFLEN];
   3026  char buf2[NUMBUFLEN];
   3027 
   3028  const char *in_str = tv_get_string(&argvars[0]);
   3029  const char *fromstr = tv_get_string_buf_chk(&argvars[1], buf);
   3030  const char *tostr = tv_get_string_buf_chk(&argvars[2], buf2);
   3031 
   3032  // Default return value: empty string.
   3033  rettv->v_type = VAR_STRING;
   3034  rettv->vval.v_string = NULL;
   3035  if (fromstr == NULL || tostr == NULL) {
   3036    return;  // Type error; errmsg already given.
   3037  }
   3038  garray_T ga;
   3039  ga_init(&ga, (int)sizeof(char), 80);
   3040 
   3041  // fromstr and tostr have to contain the same number of chars.
   3042  bool first = true;
   3043  while (*in_str != NUL) {
   3044    const char *cpstr = in_str;
   3045    const int inlen = utfc_ptr2len(in_str);
   3046    int cplen = inlen;
   3047    int idx = 0;
   3048    int fromlen;
   3049    for (const char *p = fromstr; *p != NUL; p += fromlen) {
   3050      fromlen = utfc_ptr2len(p);
   3051      if (fromlen == inlen && strncmp(in_str, p, (size_t)inlen) == 0) {
   3052        int tolen;
   3053        for (p = tostr; *p != NUL; p += tolen) {
   3054          tolen = utfc_ptr2len(p);
   3055          if (idx-- == 0) {
   3056            cplen = tolen;
   3057            cpstr = p;
   3058            break;
   3059          }
   3060        }
   3061        if (*p == NUL) {  // tostr is shorter than fromstr.
   3062          goto error;
   3063        }
   3064        break;
   3065      }
   3066      idx++;
   3067    }
   3068 
   3069    if (first && cpstr == in_str) {
   3070      // Check that fromstr and tostr have the same number of
   3071      // (multi-byte) characters.  Done only once when a character
   3072      // of in_str doesn't appear in fromstr.
   3073      first = false;
   3074      int tolen;
   3075      for (const char *p = tostr; *p != NUL; p += tolen) {
   3076        tolen = utfc_ptr2len(p);
   3077        idx--;
   3078      }
   3079      if (idx != 0) {
   3080        goto error;
   3081      }
   3082    }
   3083 
   3084    ga_grow(&ga, cplen);
   3085    memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
   3086    ga.ga_len += cplen;
   3087 
   3088    in_str += inlen;
   3089  }
   3090 
   3091  // add a terminating NUL
   3092  ga_append(&ga, NUL);
   3093 
   3094  rettv->vval.v_string = ga.ga_data;
   3095  return;
   3096 error:
   3097  semsg(_(e_invarg2), fromstr);
   3098  ga_clear(&ga);
   3099 }
   3100 
   3101 /// "trim({expr})" function
   3102 void f_trim(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
   3103 {
   3104  char buf1[NUMBUFLEN];
   3105  char buf2[NUMBUFLEN];
   3106  const char *head = tv_get_string_buf_chk(&argvars[0], buf1);
   3107  const char *mask = NULL;
   3108  const char *prev;
   3109  const char *p;
   3110  int dir = 0;
   3111 
   3112  rettv->v_type = VAR_STRING;
   3113  rettv->vval.v_string = NULL;
   3114  if (head == NULL) {
   3115    return;
   3116  }
   3117 
   3118  if (tv_check_for_opt_string_arg(argvars, 1) == FAIL) {
   3119    return;
   3120  }
   3121 
   3122  if (argvars[1].v_type == VAR_STRING) {
   3123    mask = tv_get_string_buf_chk(&argvars[1], buf2);
   3124    if (*mask == NUL) {
   3125      mask = NULL;
   3126    }
   3127 
   3128    if (argvars[2].v_type != VAR_UNKNOWN) {
   3129      bool error = false;
   3130      // leading or trailing characters to trim
   3131      dir = (int)tv_get_number_chk(&argvars[2], &error);
   3132      if (error) {
   3133        return;
   3134      }
   3135      if (dir < 0 || dir > 2) {
   3136        semsg(_(e_invarg2), tv_get_string(&argvars[2]));
   3137        return;
   3138      }
   3139    }
   3140  }
   3141 
   3142  if (dir == 0 || dir == 1) {
   3143    // Trim leading characters
   3144    while (*head != NUL) {
   3145      int c1 = utf_ptr2char(head);
   3146      if (mask == NULL) {
   3147        if (c1 > ' ' && c1 != 0xa0) {
   3148          break;
   3149        }
   3150      } else {
   3151        for (p = mask; *p != NUL; MB_PTR_ADV(p)) {
   3152          if (c1 == utf_ptr2char(p)) {
   3153            break;
   3154          }
   3155        }
   3156        if (*p == NUL) {
   3157          break;
   3158        }
   3159      }
   3160      MB_PTR_ADV(head);
   3161    }
   3162  }
   3163 
   3164  const char *tail = head + strlen(head);
   3165  if (dir == 0 || dir == 2) {
   3166    // Trim trailing characters
   3167    for (; tail > head; tail = prev) {
   3168      prev = tail;
   3169      MB_PTR_BACK(head, prev);
   3170      int c1 = utf_ptr2char(prev);
   3171      if (mask == NULL) {
   3172        if (c1 > ' ' && c1 != 0xa0) {
   3173          break;
   3174        }
   3175      } else {
   3176        for (p = mask; *p != NUL; MB_PTR_ADV(p)) {
   3177          if (c1 == utf_ptr2char(p)) {
   3178            break;
   3179          }
   3180        }
   3181        if (*p == NUL) {
   3182          break;
   3183        }
   3184      }
   3185    }
   3186  }
   3187  rettv->vval.v_string = xstrnsave(head, (size_t)(tail - head));
   3188 }
   3189 
   3190 /// compare two keyvalue_T structs by case sensitive value
   3191 int cmp_keyvalue_value(const void *a, const void *b)
   3192 {
   3193  keyvalue_T *kv1 = (keyvalue_T *)a;
   3194  keyvalue_T *kv2 = (keyvalue_T *)b;
   3195 
   3196  return strcmp(kv1->value, kv2->value);
   3197 }
   3198 
   3199 /// compare two keyvalue_T structs by value with length
   3200 int cmp_keyvalue_value_n(const void *a, const void *b)
   3201 {
   3202  keyvalue_T *kv1 = (keyvalue_T *)a;
   3203  keyvalue_T *kv2 = (keyvalue_T *)b;
   3204 
   3205  return strncmp(kv1->value, kv2->value, MAX(kv1->length, kv2->length));
   3206 }
   3207 
   3208 /// compare two keyvalue_T structs by case insensitive value
   3209 int cmp_keyvalue_value_i(const void *a, const void *b)
   3210 {
   3211  keyvalue_T *kv1 = (keyvalue_T *)a;
   3212  keyvalue_T *kv2 = (keyvalue_T *)b;
   3213 
   3214  return STRICMP(kv1->value, kv2->value);
   3215 }
   3216 
   3217 /// compare two keyvalue_T structs by case insensitive value with length
   3218 int cmp_keyvalue_value_ni(const void *a, const void *b)
   3219 {
   3220  keyvalue_T *kv1 = (keyvalue_T *)a;
   3221  keyvalue_T *kv2 = (keyvalue_T *)b;
   3222 
   3223  return STRNICMP(kv1->value, kv2->value, MAX(kv1->length, kv2->length));
   3224 }