neovim

Neovim text editor
git clone https://git.dasho.dev/neovim.git
Log | Files | Refs | README

charset.c (41711B)


      1 /// @file charset.c
      2 ///
      3 /// Code related to character sets.
      4 
      5 #include <assert.h>
      6 #include <errno.h>
      7 #include <inttypes.h>
      8 #include <limits.h>
      9 #include <stdlib.h>
     10 #include <string.h>
     11 #include <uv.h>
     12 
     13 #include "auto/config.h"
     14 #include "klib/kvec.h"
     15 #include "nvim/ascii_defs.h"
     16 #include "nvim/buffer_defs.h"
     17 #include "nvim/charset.h"
     18 #include "nvim/cursor.h"
     19 #include "nvim/eval/typval_defs.h"
     20 #include "nvim/garray.h"
     21 #include "nvim/garray_defs.h"
     22 #include "nvim/globals.h"
     23 #include "nvim/keycodes.h"
     24 #include "nvim/macros_defs.h"
     25 #include "nvim/mbyte.h"
     26 #include "nvim/memory.h"
     27 #include "nvim/option.h"
     28 #include "nvim/path.h"
     29 #include "nvim/pos_defs.h"
     30 #include "nvim/strings.h"
     31 #include "nvim/types_defs.h"
     32 #include "nvim/vim_defs.h"
     33 
     34 #include "charset.c.generated.h"
     35 
     36 static bool chartab_initialized = false;
     37 
     38 // b_chartab[] is an array with 256 bits, each bit representing one of the
     39 // characters 0-255.
     40 #define SET_CHARTAB(buf, c) \
     41  (buf)->b_chartab[(unsigned)(c) >> 6] |= (1ull << ((c) & 0x3f))
     42 #define RESET_CHARTAB(buf, c) \
     43  (buf)->b_chartab[(unsigned)(c) >> 6] &= ~(1ull << ((c) & 0x3f))
     44 #define GET_CHARTAB_TAB(chartab, c) \
     45  ((chartab)[(unsigned)(c) >> 6] & (1ull << ((c) & 0x3f)))
     46 
     47 // Table used below, see init_chartab() for an explanation
     48 static uint8_t g_chartab[256];
     49 
     50 // Flags for g_chartab[].
     51 #define CT_CELL_MASK  0x07  ///< mask: nr of display cells (1, 2 or 4)
     52 #define CT_PRINT_CHAR 0x10  ///< flag: set for printable chars
     53 #define CT_ID_CHAR    0x20  ///< flag: set for ID chars
     54 #define CT_FNAME_CHAR 0x40  ///< flag: set for file name chars
     55 
     56 /// Fill g_chartab[].  Also fills curbuf->b_chartab[] with flags for keyword
     57 /// characters for current buffer.
     58 ///
     59 /// Depends on the option settings 'iskeyword', 'isident', 'isfname',
     60 /// 'isprint' and 'encoding'.
     61 ///
     62 /// The index in g_chartab[] is the character when first byte is up to 0x80,
     63 /// if the first byte is 0x80 and above it depends on further bytes.
     64 ///
     65 /// The contents of g_chartab[]:
     66 /// - The lower two bits, masked by CT_CELL_MASK, give the number of display
     67 ///   cells the character occupies (1 or 2).  Not valid for UTF-8 above 0x80.
     68 /// - CT_PRINT_CHAR bit is set when the character is printable (no need to
     69 ///   translate the character before displaying it).  Note that only DBCS
     70 ///   characters can have 2 display cells and still be printable.
     71 /// - CT_FNAME_CHAR bit is set when the character can be in a file name.
     72 /// - CT_ID_CHAR bit is set when the character can be in an identifier.
     73 ///
     74 /// @return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has
     75 /// an error, OK otherwise.
     76 int init_chartab(void)
     77 {
     78  return buf_init_chartab(curbuf, true);
     79 }
     80 
     81 /// Helper for init_chartab
     82 ///
     83 /// @param global false: only set buf->b_chartab[]
     84 ///
     85 /// @return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has
     86 /// an error, OK otherwise.
     87 int buf_init_chartab(buf_T *buf, bool global)
     88 {
     89  if (global) {
     90    // Set the default size for printable characters:
     91    // From <Space> to '~' is 1 (printable), others are 2 (not printable).
     92    // This also inits all 'isident' and 'isfname' flags to false.
     93    int c = 0;
     94 
     95    while (c < ' ') {
     96      g_chartab[c++] = (dy_flags & kOptDyFlagUhex) ? 4 : 2;
     97    }
     98 
     99    while (c <= '~') {
    100      g_chartab[c++] = 1 + CT_PRINT_CHAR;
    101    }
    102 
    103    while (c < 256) {
    104      if (c >= 0xa0) {
    105        // UTF-8: bytes 0xa0 - 0xff are printable (latin1)
    106        // Also assume that every multi-byte char is a filename character.
    107        g_chartab[c++] = (CT_PRINT_CHAR | CT_FNAME_CHAR) + 1;
    108      } else {
    109        // the rest is unprintable by default
    110        g_chartab[c++] = (dy_flags & kOptDyFlagUhex) ? 4 : 2;
    111      }
    112    }
    113  }
    114 
    115  // Init word char flags all to false
    116  CLEAR_FIELD(buf->b_chartab);
    117 
    118  // In lisp mode the '-' character is included in keywords.
    119  if (buf->b_p_lisp) {
    120    SET_CHARTAB(buf, '-');
    121  }
    122 
    123  // Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint' options.
    124  for (int i = global ? 0 : 3; i <= 3; i++) {
    125    const char *p;
    126    if (i == 0) {
    127      // first round: 'isident'
    128      p = p_isi;
    129    } else if (i == 1) {
    130      // second round: 'isprint'
    131      p = p_isp;
    132    } else if (i == 2) {
    133      // third round: 'isfname'
    134      p = p_isf;
    135    } else {  // i == 3
    136      // fourth round: 'iskeyword'
    137      p = buf->b_p_isk;
    138    }
    139    if (parse_isopt(p, buf, false) == FAIL) {
    140      return FAIL;
    141    }
    142  }
    143 
    144  chartab_initialized = true;
    145  return OK;
    146 }
    147 
    148 /// Checks the format for the option settings 'iskeyword', 'isident', 'isfname'
    149 /// or 'isprint'.
    150 /// Returns FAIL if has an error, OK otherwise.
    151 int check_isopt(char *var)
    152 {
    153  return parse_isopt(var, NULL, true);
    154 }
    155 
    156 /// @param only_check  if false: refill g_chartab[]
    157 static int parse_isopt(const char *var, buf_T *buf, bool only_check)
    158 {
    159  const char *p = var;
    160 
    161  // Parses the 'isident', 'iskeyword', 'isfname' and 'isprint' options.
    162  // Each option is a list of characters, character numbers or ranges,
    163  // separated by commas, e.g.: "200-210,x,#-178,-"
    164  while (*p) {
    165    bool tilde = false;
    166    bool do_isalpha = false;
    167 
    168    if (*p == '^' && p[1] != NUL) {
    169      tilde = true;
    170      p++;
    171    }
    172 
    173    int c;
    174    if (ascii_isdigit(*p)) {
    175      c = getdigits_int((char **)&p, true, 0);
    176    } else {
    177      c = mb_ptr2char_adv(&p);
    178    }
    179    int c2 = -1;
    180 
    181    if (*p == '-' && p[1] != NUL) {
    182      p++;
    183 
    184      if (ascii_isdigit(*p)) {
    185        c2 = getdigits_int((char **)&p, true, 0);
    186      } else {
    187        c2 = mb_ptr2char_adv(&p);
    188      }
    189    }
    190 
    191    if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256
    192        || !(*p == NUL || *p == ',')) {
    193      return FAIL;
    194    }
    195 
    196    bool trail_comma = *p == ',';
    197    p = skip_to_option_part(p);
    198    if (trail_comma && *p == NUL) {
    199      // Trailing comma is not allowed.
    200      return FAIL;
    201    }
    202 
    203    if (only_check) {
    204      continue;
    205    }
    206 
    207    if (c2 == -1) {  // not a range
    208      // A single '@' (not "@-@"):
    209      // Decide on letters being ID/printable/keyword chars with
    210      // standard function isalpha(). This takes care of locale for
    211      // single-byte characters).
    212      if (c == '@') {
    213        do_isalpha = true;
    214        c = 1;
    215        c2 = 255;
    216      } else {
    217        c2 = c;
    218      }
    219    }
    220 
    221    while (c <= c2) {
    222      // Use the MB_ functions here, because isalpha() doesn't
    223      // work properly when 'encoding' is "latin1" and the locale is
    224      // "C".
    225      if (!do_isalpha
    226          || mb_islower(c)
    227          || mb_isupper(c)) {
    228        if (var == p_isi) {  // (re)set ID flag
    229          if (tilde) {
    230            g_chartab[c] &= (uint8_t) ~CT_ID_CHAR;
    231          } else {
    232            g_chartab[c] |= CT_ID_CHAR;
    233          }
    234        } else if (var == p_isp) {  // (re)set printable
    235          if (c < ' ' || c > '~') {
    236            if (tilde) {
    237              g_chartab[c] = (uint8_t)((g_chartab[c] & ~CT_CELL_MASK)
    238                                       + ((dy_flags & kOptDyFlagUhex) ? 4 : 2));
    239              g_chartab[c] &= (uint8_t) ~CT_PRINT_CHAR;
    240            } else {
    241              g_chartab[c] = (uint8_t)((g_chartab[c] & ~CT_CELL_MASK) + 1);
    242              g_chartab[c] |= CT_PRINT_CHAR;
    243            }
    244          }
    245        } else if (var == p_isf) {  // (re)set fname flag
    246          if (tilde) {
    247            g_chartab[c] &= (uint8_t) ~CT_FNAME_CHAR;
    248          } else {
    249            g_chartab[c] |= CT_FNAME_CHAR;
    250          }
    251        } else {  // (var == p_isk || var == buf->b_p_isk) (re)set keyword flag
    252          if (tilde) {
    253            RESET_CHARTAB(buf, c);
    254          } else {
    255            SET_CHARTAB(buf, c);
    256          }
    257        }
    258      }
    259      c++;
    260    }
    261  }
    262 
    263  return OK;
    264 }
    265 
    266 /// Translate any special characters in buf[bufsize] in-place.
    267 ///
    268 /// The result is a string with only printable characters, but if there is not
    269 /// enough room, not all characters will be translated.
    270 ///
    271 /// @param buf
    272 /// @param bufsize
    273 void trans_characters(char *buf, int bufsize)
    274 {
    275  char *trs;                   // translated character
    276  int len = (int)strlen(buf);  // length of string needing translation
    277  int room = bufsize - len;    // room in buffer after string
    278 
    279  while (*buf != 0) {
    280    int trs_len;      // length of trs[]
    281    // Assume a multi-byte character doesn't need translation.
    282    if ((trs_len = utfc_ptr2len(buf)) > 1) {
    283      len -= trs_len;
    284    } else {
    285      trs = transchar_byte((uint8_t)(*buf));
    286      trs_len = (int)strlen(trs);
    287 
    288      if (trs_len > 1) {
    289        room -= trs_len - 1;
    290        if (room <= 0) {
    291          return;
    292        }
    293        memmove(buf + trs_len, buf + 1, (size_t)len);
    294      }
    295      memmove(buf, trs, (size_t)trs_len);
    296      len--;
    297    }
    298    buf += trs_len;
    299  }
    300 }
    301 
    302 /// Find length of a string capable of holding s with all specials replaced
    303 ///
    304 /// Assumes replacing special characters with printable ones just like
    305 /// strtrans() does.
    306 ///
    307 /// @param[in]  s  String to check.
    308 ///
    309 /// @return number of bytes needed to hold a translation of `s`, NUL byte not
    310 ///         included.
    311 size_t transstr_len(const char *const s, bool untab)
    312  FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE
    313 {
    314  const char *p = s;
    315  size_t len = 0;
    316 
    317  while (*p) {
    318    const size_t l = (size_t)utfc_ptr2len(p);
    319    if (l > 1) {
    320      if (vim_isprintc(utf_ptr2char(p))) {
    321        len += l;
    322      } else {
    323        for (size_t off = 0; off < l; off += (size_t)utf_ptr2len(p + off)) {
    324          int c = utf_ptr2char(p + off);
    325          char hexbuf[9];
    326          len += transchar_hex(hexbuf, c);
    327        }
    328      }
    329      p += l;
    330    } else if (*p == TAB && !untab) {
    331      len += 1;
    332      p++;
    333    } else {
    334      const int b2c_l = byte2cells((uint8_t)(*p++));
    335      // Illegal byte sequence may occupy up to 4 characters.
    336      len += (size_t)(b2c_l > 0 ? b2c_l : 4);
    337    }
    338  }
    339  return len;
    340 }
    341 
    342 /// Replace special characters with printable ones
    343 ///
    344 /// @param[in]  s  String to replace characters from.
    345 /// @param[out]  buf  Buffer to which result should be saved.
    346 /// @param[in]  len  Buffer length. Resulting string may not occupy more then
    347 ///                  len - 1 bytes (one for trailing NUL byte).
    348 /// @param[in]  untab  remove tab characters
    349 ///
    350 /// @return length of the resulting string, without the NUL byte.
    351 size_t transstr_buf(const char *const s, const ssize_t slen, char *const buf, const size_t buflen,
    352                    bool untab)
    353  FUNC_ATTR_NONNULL_ALL
    354 {
    355  const char *p = s;
    356  char *buf_p = buf;
    357  char *const buf_e = buf_p + buflen - 1;
    358 
    359  while ((slen < 0 || (p - s) < slen) && *p != NUL && buf_p < buf_e) {
    360    const size_t l = (size_t)utfc_ptr2len(p);
    361    if (l > 1) {
    362      if (buf_p + l > buf_e) {
    363        break;  // Exceeded `buf` size.
    364      }
    365 
    366      if (vim_isprintc(utf_ptr2char(p))) {
    367        memmove(buf_p, p, l);
    368        buf_p += l;
    369      } else {
    370        for (size_t off = 0; off < l; off += (size_t)utf_ptr2len(p + off)) {
    371          int c = utf_ptr2char(p + off);
    372          char hexbuf[9];  // <up to 6 bytes>NUL
    373          const size_t hexlen = transchar_hex(hexbuf, c);
    374          if (buf_p + hexlen > buf_e) {
    375            break;
    376          }
    377          memmove(buf_p, hexbuf, hexlen);
    378          buf_p += hexlen;
    379        }
    380      }
    381      p += l;
    382    } else if (*p == TAB && !untab) {
    383      *buf_p++ = *p++;
    384    } else {
    385      const char *const tb = transchar_byte((uint8_t)(*p++));
    386      const size_t tb_len = strlen(tb);
    387      if (buf_p + tb_len > buf_e) {
    388        break;  // Exceeded `buf` size.
    389      }
    390      memmove(buf_p, tb, tb_len);
    391      buf_p += tb_len;
    392    }
    393  }
    394  *buf_p = NUL;
    395  assert(buf_p <= buf_e);
    396  return (size_t)(buf_p - buf);
    397 }
    398 
    399 /// Copy string and replace special characters with printable characters
    400 ///
    401 /// Works like `strtrans()` does, used for that and in some other places.
    402 ///
    403 /// @param[in]  s  String to replace characters from.
    404 ///
    405 /// @return [allocated] translated string
    406 char *transstr(const char *const s, bool untab)
    407  FUNC_ATTR_NONNULL_RET
    408 {
    409  // Compute the length of the result, taking account of unprintable
    410  // multi-byte characters.
    411  const size_t len = transstr_len(s, untab) + 1;
    412  char *const buf = xmalloc(len);
    413  transstr_buf(s, -1, buf, len, untab);
    414  return buf;
    415 }
    416 
    417 size_t kv_transstr(StringBuilder *str, const char *const s, bool untab)
    418  FUNC_ATTR_NONNULL_ARG(1)
    419 {
    420  if (!s) {
    421    return 0;
    422  }
    423 
    424  // Compute the length of the result, taking account of unprintable
    425  // multi-byte characters.
    426  const size_t len = transstr_len(s, untab);
    427  kv_ensure_space(*str, len + 1);
    428  transstr_buf(s, -1, str->items + str->size, len + 1, untab);
    429  str->size += len;  // do not include NUL byte
    430  return len;
    431 }
    432 
    433 /// Convert the string "str[orglen]" to do ignore-case comparing.
    434 /// Use the current locale.
    435 ///
    436 /// When "buf" is NULL, return an allocated string.
    437 /// Otherwise, put the result in buf, limited by buflen, and return buf.
    438 char *str_foldcase(char *str, int orglen, char *buf, int buflen)
    439  FUNC_ATTR_NONNULL_RET
    440 {
    441  garray_T ga;
    442  int len = orglen;
    443 
    444 #define GA_CHAR(i) ((char *)ga.ga_data)[i]
    445 #define GA_PTR(i) ((char *)ga.ga_data + (i))
    446 #define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i])
    447 #define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + (i))
    448 
    449  // Copy "str" into "buf" or allocated memory, unmodified.
    450  if (buf == NULL) {
    451    ga_init(&ga, 1, 10);
    452 
    453    ga_grow(&ga, len + 1);
    454    memmove(ga.ga_data, str, (size_t)len);
    455    ga.ga_len = len;
    456  } else {
    457    if (len >= buflen) {
    458      // Ugly!
    459      len = buflen - 1;
    460    }
    461    memmove(buf, str, (size_t)len);
    462  }
    463 
    464  if (buf == NULL) {
    465    GA_CHAR(len) = NUL;
    466  } else {
    467    buf[len] = NUL;
    468  }
    469 
    470  // Make each character lower case.
    471  int i = 0;
    472  while (STR_CHAR(i) != NUL) {
    473    int c = utf_ptr2char(STR_PTR(i));
    474    int olen = utf_ptr2len(STR_PTR(i));
    475    int lc = mb_tolower(c);
    476 
    477    // Only replace the character when it is not an invalid
    478    // sequence (ASCII character or more than one byte) and
    479    // mb_tolower() doesn't return the original character.
    480    if (((c < 0x80) || (olen > 1)) && (c != lc)) {
    481      int nlen = utf_char2len(lc);
    482 
    483      // If the byte length changes need to shift the following
    484      // characters forward or backward.
    485      if (olen != nlen) {
    486        if (nlen > olen) {
    487          if (buf == NULL) {
    488            ga_grow(&ga, nlen - olen + 1);
    489          } else {
    490            if (len + nlen - olen >= buflen) {
    491              // out of memory, keep old char
    492              lc = c;
    493              nlen = olen;
    494            }
    495          }
    496        }
    497 
    498        if (olen != nlen) {
    499          if (buf == NULL) {
    500            STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen);
    501            ga.ga_len += nlen - olen;
    502          } else {
    503            STRMOVE(buf + i + nlen, buf + i + olen);
    504            len += nlen - olen;
    505          }
    506        }
    507      }
    508      utf_char2bytes(lc, STR_PTR(i));
    509    }
    510 
    511    // skip to next multi-byte char
    512    i += utfc_ptr2len(STR_PTR(i));
    513  }
    514 
    515  if (buf == NULL) {
    516    return ga.ga_data;
    517  }
    518  return buf;
    519 }
    520 
    521 // Catch 22: g_chartab[] can't be initialized before the options are
    522 // initialized, and initializing options may cause transchar() to be called!
    523 // When chartab_initialized == false don't use g_chartab[].
    524 // Does NOT work for multi-byte characters, c must be <= 255.
    525 // Also doesn't work for the first byte of a multi-byte, "c" must be a
    526 // character!
    527 static uint8_t transchar_charbuf[11];
    528 
    529 /// Translate a character into a printable one, leaving printable ASCII intact
    530 ///
    531 /// All unicode characters are considered non-printable in this function.
    532 ///
    533 /// @param[in]  c  Character to translate.
    534 ///
    535 /// @return translated character into a static buffer.
    536 char *transchar(int c)
    537 {
    538  return transchar_buf(curbuf, c);
    539 }
    540 
    541 char *transchar_buf(const buf_T *buf, int c)
    542 {
    543  int i = 0;
    544  if (IS_SPECIAL(c)) {
    545    // special key code, display as ~@ char
    546    transchar_charbuf[0] = '~';
    547    transchar_charbuf[1] = '@';
    548    i = 2;
    549    c = K_SECOND(c);
    550  }
    551 
    552  if ((!chartab_initialized && (c >= ' ' && c <= '~'))
    553      || ((c <= 0xFF) && vim_isprintc(c))) {
    554    // printable character
    555    transchar_charbuf[i] = (uint8_t)c;
    556    transchar_charbuf[i + 1] = NUL;
    557  } else if (c <= 0xFF) {
    558    transchar_nonprint(buf, (char *)transchar_charbuf + i, c);
    559  } else {
    560    transchar_hex((char *)transchar_charbuf + i, c);
    561  }
    562  return (char *)transchar_charbuf;
    563 }
    564 
    565 /// Like transchar(), but called with a byte instead of a character.
    566 ///
    567 /// Checks for an illegal UTF-8 byte.  Uses 'fileformat' of the current buffer.
    568 ///
    569 /// @param[in]  c  Byte to translate.
    570 ///
    571 /// @return pointer to translated character in transchar_charbuf.
    572 char *transchar_byte(const int c)
    573  FUNC_ATTR_WARN_UNUSED_RESULT
    574 {
    575  return transchar_byte_buf(curbuf, c);
    576 }
    577 
    578 /// Like transchar_buf(), but called with a byte instead of a character.
    579 ///
    580 /// Checks for an illegal UTF-8 byte.  Uses 'fileformat' of "buf", unless it is NULL.
    581 ///
    582 /// @param[in]  c  Byte to translate.
    583 ///
    584 /// @return pointer to translated character in transchar_charbuf.
    585 char *transchar_byte_buf(const buf_T *buf, const int c)
    586  FUNC_ATTR_WARN_UNUSED_RESULT
    587 {
    588  if (c >= 0x80) {
    589    transchar_nonprint(buf, (char *)transchar_charbuf, c);
    590    return (char *)transchar_charbuf;
    591  }
    592  return transchar_buf(buf, c);
    593 }
    594 
    595 /// Convert non-printable characters to 2..4 printable ones
    596 ///
    597 /// @warning Does not work for multi-byte characters, c must be <= 255.
    598 ///
    599 /// @param[in]  buf  Required to check the file format
    600 /// @param[out]  charbuf  Buffer to store result in, must be able to hold
    601 ///                       at least 5 bytes (conversion result + NUL).
    602 /// @param[in]  c  Character to convert. NUL is assumed to be NL according to
    603 ///                `:h NL-used-for-NUL`.
    604 void transchar_nonprint(const buf_T *buf, char *charbuf, int c)
    605 {
    606  if (c == NL) {
    607    // we use newline in place of a NUL
    608    c = NUL;
    609  } else if (buf != NULL && c == CAR && get_fileformat(buf) == EOL_MAC) {
    610    // we use CR in place of  NL in this case
    611    c = NL;
    612  }
    613  assert(c <= 0xff);
    614 
    615  if (dy_flags & kOptDyFlagUhex || c > 0x7f) {
    616    // 'display' has "uhex"
    617    transchar_hex(charbuf, c);
    618  } else {
    619    // 0x00 - 0x1f and 0x7f
    620    charbuf[0] = '^';
    621    // DEL displayed as ^?
    622    charbuf[1] = (char)(uint8_t)(c ^ 0x40);
    623 
    624    charbuf[2] = NUL;
    625  }
    626 }
    627 
    628 /// Convert a non-printable character to hex C string like "<FFFF>"
    629 ///
    630 /// @param[out]  buf  Buffer to store result in.
    631 /// @param[in]  c  Character to convert.
    632 ///
    633 /// @return Number of bytes stored in buffer, excluding trailing NUL byte.
    634 size_t transchar_hex(char *const buf, const int c)
    635  FUNC_ATTR_NONNULL_ALL
    636 {
    637  size_t i = 0;
    638 
    639  buf[i++] = '<';
    640  if (c > 0xFF) {
    641    if (c > 0xFFFF) {
    642      buf[i++] = (char)nr2hex((unsigned)c >> 20);
    643      buf[i++] = (char)nr2hex((unsigned)c >> 16);
    644    }
    645    buf[i++] = (char)nr2hex((unsigned)c >> 12);
    646    buf[i++] = (char)nr2hex((unsigned)c >> 8);
    647  }
    648  buf[i++] = (char)(nr2hex((unsigned)c >> 4));
    649  buf[i++] = (char)(nr2hex((unsigned)c));
    650  buf[i++] = '>';
    651  buf[i] = NUL;
    652  return i;
    653 }
    654 
    655 /// Mirror text "str" for right-left displaying.
    656 /// Only works for single-byte characters (e.g., numbers).
    657 void rl_mirror_ascii(char *str, char *end)
    658 {
    659  for (char *p1 = str, *p2 = (end ? end : str + strlen(str)) - 1; p1 < p2; p1++, p2--) {
    660    char t = *p1;
    661    *p1 = *p2;
    662    *p2 = t;
    663  }
    664 }
    665 
    666 /// Convert the lower 4 bits of byte "c" to its hex character
    667 ///
    668 /// Lower case letters are used to avoid the confusion of <F1> being 0xf1 or
    669 /// function key 1.
    670 ///
    671 /// @param[in]  n  Number to convert.
    672 ///
    673 /// @return the hex character.
    674 static inline unsigned nr2hex(unsigned n)
    675  FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
    676 {
    677  if ((n & 0xf) <= 9) {
    678    return (n & 0xf) + '0';
    679  }
    680  return (n & 0xf) - 10 + 'a';
    681 }
    682 
    683 /// Return number of display cells occupied by byte "b".
    684 ///
    685 /// Caller must make sure 0 <= b <= 255.
    686 /// For multi-byte mode "b" must be the first byte of a character.
    687 /// A TAB is counted as two cells: "^I".
    688 /// This will return 0 for bytes >= 0x80, because the number of
    689 /// cells depends on further bytes in UTF-8.
    690 ///
    691 /// @param b
    692 ///
    693 /// @return Number of display cells.
    694 int byte2cells(int b)
    695  FUNC_ATTR_PURE
    696 {
    697  if (b >= 0x80) {
    698    return 0;
    699  }
    700  return g_chartab[b] & CT_CELL_MASK;
    701 }
    702 
    703 /// Return number of display cells occupied by character "c".
    704 ///
    705 /// "c" can be a special key (negative number) in which case 3 or 4 is returned.
    706 /// A TAB is counted as two cells: "^I" or four: "<09>".
    707 ///
    708 /// @param c
    709 ///
    710 /// @return Number of display cells.
    711 int char2cells(int c)
    712 {
    713  if (IS_SPECIAL(c)) {
    714    return char2cells(K_SECOND(c)) + 2;
    715  }
    716 
    717  if (c >= 0x80) {
    718    // UTF-8: above 0x80 need to check the value
    719    return utf_char2cells(c);
    720  }
    721  return g_chartab[c & 0xff] & CT_CELL_MASK;
    722 }
    723 
    724 /// Return number of display cells occupied by character at "*p".
    725 /// A TAB is counted as two cells: "^I" or four: "<09>".
    726 ///
    727 /// @param p
    728 ///
    729 /// @return number of display cells.
    730 int ptr2cells(const char *p_in)
    731 {
    732  uint8_t *p = (uint8_t *)p_in;
    733  // For UTF-8 we need to look at more bytes if the first byte is >= 0x80.
    734  if (*p >= 0x80) {
    735    return utf_ptr2cells(p_in);
    736  }
    737 
    738  // For DBCS we can tell the cell count from the first byte.
    739  return g_chartab[*p] & CT_CELL_MASK;
    740 }
    741 
    742 /// Return the number of character cells string "s" will take on the screen,
    743 /// counting TABs as two characters: "^I".
    744 ///
    745 /// 's' must be non-null.
    746 ///
    747 /// @param s
    748 ///
    749 /// @return number of character cells.
    750 int vim_strsize(const char *s)
    751 {
    752  return vim_strnsize(s, MAXCOL);
    753 }
    754 
    755 /// Return the number of character cells string "s[len]" will take on the
    756 /// screen, counting TABs as two characters: "^I".
    757 ///
    758 /// 's' must be non-null.
    759 ///
    760 /// @param s
    761 /// @param len
    762 ///
    763 /// @return Number of character cells.
    764 int vim_strnsize(const char *s, int len)
    765 {
    766  assert(s != NULL);
    767  int size = 0;
    768  while (*s != NUL && --len >= 0) {
    769    int l = utfc_ptr2len(s);
    770    size += ptr2cells(s);
    771    s += l;
    772    len -= l - 1;
    773  }
    774  return size;
    775 }
    776 
    777 /// Check that "c" is a normal identifier character:
    778 /// Letters and characters from the 'isident' option.
    779 ///
    780 /// @param  c  character to check
    781 bool vim_isIDc(int c)
    782  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
    783 {
    784  return c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR);
    785 }
    786 
    787 /// Check that "c" is a keyword character:
    788 /// Letters and characters from 'iskeyword' option for the current buffer.
    789 /// For multi-byte characters mb_get_class() is used (builtin rules).
    790 ///
    791 /// @param  c  character to check
    792 bool vim_iswordc(const int c)
    793  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
    794 {
    795  return vim_iswordc_buf(c, curbuf);
    796 }
    797 
    798 /// Check that "c" is a keyword character
    799 /// Letters and characters from 'iskeyword' option for given buffer.
    800 /// For multi-byte characters mb_get_class() is used (builtin rules).
    801 ///
    802 /// @param[in]  c  Character to check.
    803 /// @param[in]  chartab  Buffer chartab.
    804 bool vim_iswordc_tab(const int c, const uint64_t *const chartab)
    805  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
    806 {
    807  return (c >= 0x100
    808          ? (utf_class_tab(c, chartab) >= 2)
    809          : (c > 0 && GET_CHARTAB_TAB(chartab, c) != 0));
    810 }
    811 
    812 /// Check that "c" is a keyword character:
    813 /// Letters and characters from 'iskeyword' option for given buffer.
    814 /// For multi-byte characters mb_get_class() is used (builtin rules).
    815 ///
    816 /// @param  c    character to check
    817 /// @param  buf  buffer whose keywords to use
    818 bool vim_iswordc_buf(const int c, buf_T *const buf)
    819  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ARG(2)
    820 {
    821  return vim_iswordc_tab(c, buf->b_chartab);
    822 }
    823 
    824 /// Just like vim_iswordc() but uses a pointer to the (multi-byte) character.
    825 ///
    826 /// @param  p  pointer to the multi-byte character
    827 ///
    828 /// @return true if "p" points to a keyword character.
    829 bool vim_iswordp(const char *const p)
    830  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
    831 {
    832  return vim_iswordp_buf(p, curbuf);
    833 }
    834 
    835 /// Just like vim_iswordc_buf() but uses a pointer to the (multi-byte)
    836 /// character.
    837 ///
    838 /// @param  p    pointer to the multi-byte character
    839 /// @param  buf  buffer whose keywords to use
    840 ///
    841 /// @return true if "p" points to a keyword character.
    842 bool vim_iswordp_buf(const char *const p, buf_T *const buf)
    843  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
    844 {
    845  int c = (uint8_t)(*p);
    846 
    847  if (MB_BYTE2LEN(c) > 1) {
    848    c = utf_ptr2char(p);
    849  }
    850  return vim_iswordc_buf(c, buf);
    851 }
    852 
    853 /// Check that "c" is a valid file-name character as specified with the
    854 /// 'isfname' option.
    855 /// Assume characters above 0x100 are valid (multi-byte).
    856 /// To be used for commands like "gf".
    857 ///
    858 /// @param  c  character to check
    859 bool vim_isfilec(int c)
    860  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
    861 {
    862  return c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR));
    863 }
    864 
    865 /// Check if "c" is a valid file-name character, including characters left
    866 /// out of 'isfname' to make "gf" work, such as ',', ' ', '@', ':', etc.
    867 bool vim_is_fname_char(int c)
    868  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
    869 {
    870  return vim_isfilec(c) || c == ',' || c == ' ' || c == '@' || c == ':';
    871 }
    872 
    873 /// Check that "c" is a valid file-name character or a wildcard character
    874 /// Assume characters above 0x100 are valid (multi-byte).
    875 /// Explicitly interpret ']' as a wildcard character as path_has_wildcard("]")
    876 /// returns false.
    877 ///
    878 /// @param  c  character to check
    879 bool vim_isfilec_or_wc(int c)
    880  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
    881 {
    882  char buf[2];
    883  buf[0] = (char)c;
    884  buf[1] = NUL;
    885  return vim_isfilec(c) || c == ']' || path_has_wildcard(buf);
    886 }
    887 
    888 /// Check that "c" is a printable character.
    889 ///
    890 /// @param  c  character to check
    891 bool vim_isprintc(int c)
    892  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
    893 {
    894  if (c >= 0x100) {
    895    return utf_printable(c);
    896  }
    897  return c > 0 && (g_chartab[c] & CT_PRINT_CHAR);
    898 }
    899 
    900 /// skipwhite: skip over ' ' and '\t'.
    901 ///
    902 /// @param[in]  p  String to skip in.
    903 ///
    904 /// @return Pointer to character after the skipped whitespace.
    905 char *skipwhite(const char *p)
    906  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
    907  FUNC_ATTR_NONNULL_RET
    908 {
    909  while (ascii_iswhite(*p)) {
    910    p++;
    911  }
    912  return (char *)p;
    913 }
    914 
    915 /// Like `skipwhite`, but skip up to `len` characters.
    916 /// @see skipwhite
    917 ///
    918 /// @param[in]  p    String to skip in.
    919 /// @param[in]  len  Max length to skip.
    920 ///
    921 /// @return Pointer to character after the skipped whitespace, or the `len`-th
    922 ///         character in the string.
    923 char *skipwhite_len(const char *p, size_t len)
    924  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
    925  FUNC_ATTR_NONNULL_RET
    926 {
    927  for (; len > 0 && ascii_iswhite(*p); len--) {
    928    p++;
    929  }
    930  return (char *)p;
    931 }
    932 
    933 // getwhitecols: return the number of whitespace
    934 // columns (bytes) at the start of a given line
    935 intptr_t getwhitecols_curline(void)
    936 {
    937  return getwhitecols(get_cursor_line_ptr());
    938 }
    939 
    940 intptr_t getwhitecols(const char *p)
    941  FUNC_ATTR_PURE
    942 {
    943  return skipwhite(p) - p;
    944 }
    945 
    946 /// Skip over digits
    947 ///
    948 /// @param[in]  q  String to skip digits in.
    949 ///
    950 /// @return Pointer to the character after the skipped digits.
    951 char *skipdigits(const char *q)
    952  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
    953  FUNC_ATTR_NONNULL_RET
    954 {
    955  const char *p = q;
    956  while (ascii_isdigit(*p)) {
    957    // skip to next non-digit
    958    p++;
    959  }
    960  return (char *)p;
    961 }
    962 
    963 /// skip over binary digits
    964 ///
    965 /// @param q pointer to string
    966 ///
    967 /// @return Pointer to the character after the skipped digits.
    968 const char *skipbin(const char *q)
    969  FUNC_ATTR_PURE
    970  FUNC_ATTR_NONNULL_ALL
    971  FUNC_ATTR_NONNULL_RET
    972 {
    973  const char *p = q;
    974  while (ascii_isbdigit(*p)) {
    975    // skip to next non-digit
    976    p++;
    977  }
    978  return p;
    979 }
    980 
    981 /// skip over digits and hex characters
    982 ///
    983 /// @param q
    984 ///
    985 /// @return Pointer to the character after the skipped digits and hex
    986 ///         characters.
    987 char *skiphex(char *q)
    988  FUNC_ATTR_PURE
    989 {
    990  char *p = q;
    991  while (ascii_isxdigit(*p)) {
    992    // skip to next non-digit
    993    p++;
    994  }
    995  return p;
    996 }
    997 
    998 /// skip to digit (or NUL after the string)
    999 ///
   1000 /// @param q
   1001 ///
   1002 /// @return Pointer to the digit or (NUL after the string).
   1003 char *skiptodigit(char *q)
   1004  FUNC_ATTR_PURE
   1005 {
   1006  char *p = q;
   1007  while (*p != NUL && !ascii_isdigit(*p)) {
   1008    // skip to next digit
   1009    p++;
   1010  }
   1011  return p;
   1012 }
   1013 
   1014 /// skip to binary character (or NUL after the string)
   1015 ///
   1016 /// @param q pointer to string
   1017 ///
   1018 /// @return Pointer to the binary character or (NUL after the string).
   1019 const char *skiptobin(const char *q)
   1020  FUNC_ATTR_PURE
   1021  FUNC_ATTR_NONNULL_ALL
   1022  FUNC_ATTR_NONNULL_RET
   1023 {
   1024  const char *p = q;
   1025  while (*p != NUL && !ascii_isbdigit(*p)) {
   1026    // skip to next digit
   1027    p++;
   1028  }
   1029  return p;
   1030 }
   1031 
   1032 /// skip to hex character (or NUL after the string)
   1033 ///
   1034 /// @param q
   1035 ///
   1036 /// @return Pointer to the hex character or (NUL after the string).
   1037 char *skiptohex(char *q)
   1038  FUNC_ATTR_PURE
   1039 {
   1040  char *p = q;
   1041  while (*p != NUL && !ascii_isxdigit(*p)) {
   1042    // skip to next digit
   1043    p++;
   1044  }
   1045  return p;
   1046 }
   1047 
   1048 /// Skip over text until ' ' or '\t' or NUL
   1049 ///
   1050 /// @param[in]  p  Text to skip over.
   1051 ///
   1052 /// @return Pointer to the next whitespace or NUL character.
   1053 char *skiptowhite(const char *p)
   1054  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
   1055 {
   1056  while (*p != ' ' && *p != '\t' && *p != NUL) {
   1057    p++;
   1058  }
   1059  return (char *)p;
   1060 }
   1061 
   1062 /// skiptowhite_esc: Like skiptowhite(), but also skip escaped chars
   1063 ///
   1064 /// @param p
   1065 ///
   1066 /// @return Pointer to the next whitespace character.
   1067 char *skiptowhite_esc(const char *p)
   1068  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
   1069 {
   1070  while (*p != ' ' && *p != '\t' && *p != NUL) {
   1071    if (((*p == '\\') || (*p == Ctrl_V)) && (*(p + 1) != NUL)) {
   1072      p++;
   1073    }
   1074    p++;
   1075  }
   1076  return (char *)p;
   1077 }
   1078 
   1079 /// Skip over text until '\n' or NUL.
   1080 ///
   1081 /// @param[in]  p  Text to skip over.
   1082 ///
   1083 /// @return Pointer to the next '\n' or NUL character.
   1084 char *skip_to_newline(const char *const p)
   1085  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
   1086  FUNC_ATTR_NONNULL_RET
   1087 {
   1088  return xstrchrnul(p, NL);
   1089 }
   1090 
   1091 /// Gets a number from a string and skips over it, signalling overflow.
   1092 ///
   1093 /// @param[out]  pp  A pointer to a pointer to char.
   1094 ///                  It will be advanced past the read number.
   1095 /// @param[out]  nr  Number read from the string.
   1096 ///
   1097 /// @return true on success, false on error/overflow
   1098 bool try_getdigits(char **pp, intmax_t *nr)
   1099 {
   1100  errno = 0;
   1101  *nr = strtoimax(*pp, pp, 10);
   1102  if (errno == ERANGE && (*nr == INTMAX_MIN || *nr == INTMAX_MAX)) {
   1103    return false;
   1104  }
   1105  return true;
   1106 }
   1107 
   1108 /// Gets a number from a string and skips over it.
   1109 ///
   1110 /// @param[out]  pp  Pointer to a pointer to char.
   1111 ///                  It will be advanced past the read number.
   1112 /// @param strict    Abort on overflow.
   1113 /// @param def       Default value, if parsing fails or overflow occurs.
   1114 ///
   1115 /// @return Number read from the string, or `def` on parse failure or overflow.
   1116 intmax_t getdigits(char **pp, bool strict, intmax_t def)
   1117 {
   1118  intmax_t number;
   1119  int ok = try_getdigits(pp, &number);
   1120  if (strict && !ok) {
   1121    abort();
   1122  }
   1123  return ok ? number : def;
   1124 }
   1125 
   1126 /// Gets an int number from a string.
   1127 ///
   1128 /// @see getdigits
   1129 int getdigits_int(char **pp, bool strict, int def)
   1130 {
   1131  intmax_t number = getdigits(pp, strict, def);
   1132 #if SIZEOF_INTMAX_T > SIZEOF_INT
   1133  if (strict) {
   1134    assert(number >= INT_MIN && number <= INT_MAX);
   1135  } else if (!(number >= INT_MIN && number <= INT_MAX)) {
   1136    return def;
   1137  }
   1138 #endif
   1139  return (int)number;
   1140 }
   1141 
   1142 /// Gets a long number from a string.
   1143 ///
   1144 /// @see getdigits
   1145 long getdigits_long(char **pp, bool strict, long def)
   1146 {
   1147  intmax_t number = getdigits(pp, strict, def);
   1148 #if SIZEOF_INTMAX_T > SIZEOF_LONG
   1149  if (strict) {
   1150    assert(number >= LONG_MIN && number <= LONG_MAX);
   1151  } else if (!(number >= LONG_MIN && number <= LONG_MAX)) {
   1152    return def;
   1153  }
   1154 #endif
   1155  return (long)number;
   1156 }
   1157 
   1158 /// Gets a int32_t number from a string.
   1159 ///
   1160 /// @see getdigits
   1161 int32_t getdigits_int32(char **pp, bool strict, int32_t def)
   1162 {
   1163  intmax_t number = getdigits(pp, strict, def);
   1164 #if SIZEOF_INTMAX_T > 4
   1165  if (strict) {
   1166    assert(number >= INT32_MIN && number <= INT32_MAX);
   1167  } else if (!(number >= INT32_MIN && number <= INT32_MAX)) {
   1168    return def;
   1169  }
   1170 #endif
   1171  return (int32_t)number;
   1172 }
   1173 
   1174 /// Check that "lbuf" is empty or only contains blanks.
   1175 ///
   1176 /// @param  lbuf  line buffer to check
   1177 bool vim_isblankline(char *lbuf)
   1178  FUNC_ATTR_PURE
   1179 {
   1180  char *p = skipwhite(lbuf);
   1181  return *p == NUL || *p == '\r' || *p == '\n';
   1182 }
   1183 
   1184 /// Convert a string into a long and/or unsigned long, taking care of
   1185 /// hexadecimal, octal and binary numbers.  Accepts a '-' sign.
   1186 /// If "prep" is not NULL, returns a flag to indicate the type of the number:
   1187 ///   0      decimal
   1188 ///   '0'    octal
   1189 ///   'O'    octal
   1190 ///   'o'    octal
   1191 ///   'B'    bin
   1192 ///   'b'    bin
   1193 ///   'X'    hex
   1194 ///   'x'    hex
   1195 /// If "len" is not NULL, the length of the number in characters is returned.
   1196 /// If "nptr" is not NULL, the signed result is returned in it.
   1197 /// If "unptr" is not NULL, the unsigned result is returned in it.
   1198 /// If "what" contains STR2NR_BIN recognize binary numbers.
   1199 /// If "what" contains STR2NR_OCT recognize octal numbers.
   1200 /// If "what" contains STR2NR_HEX recognize hex numbers.
   1201 /// If "what" contains STR2NR_FORCE always assume bin/oct/hex.
   1202 /// If "what" contains STR2NR_QUOTE ignore embedded single quotes
   1203 /// If maxlen > 0, check at a maximum maxlen chars.
   1204 /// If strict is true, check the number strictly. return *len = 0 if fail.
   1205 ///
   1206 /// @param start
   1207 /// @param prep Returns guessed type of number 0 = decimal, 'x' or 'X' is
   1208 ///             hexadecimal, '0', 'o' or 'O' is octal, 'b' or 'B' is binary.
   1209 ///             When using STR2NR_FORCE is always zero.
   1210 /// @param len Returns the detected length of number.
   1211 /// @param what Recognizes what number passed, @see ChStr2NrFlags.
   1212 /// @param nptr Returns the signed result.
   1213 /// @param unptr Returns the unsigned result.
   1214 /// @param maxlen Max length of string to check.
   1215 /// @param strict If true, fail if the number has unexpected trailing
   1216 ///               alphanumeric chars: *len is set to 0 and nothing else is
   1217 ///               returned.
   1218 /// @param overflow When not NULL, set to true for overflow.
   1219 void vim_str2nr(const char *const start, int *const prep, int *const len, const int what,
   1220                varnumber_T *const nptr, uvarnumber_T *const unptr, const int maxlen,
   1221                const bool strict, bool *const overflow)
   1222  FUNC_ATTR_NONNULL_ARG(1)
   1223 {
   1224  const char *ptr = start;
   1225 #define STRING_ENDED(ptr) \
   1226  (!(maxlen == 0 || (int)((ptr) - start) < maxlen))
   1227  int pre = 0;  // default is decimal
   1228  const bool negative = (ptr[0] == '-');
   1229  uvarnumber_T un = 0;
   1230 
   1231  if (len != NULL) {
   1232    *len = 0;
   1233  }
   1234 
   1235  if (negative) {
   1236    ptr++;
   1237  }
   1238 
   1239  if (what & STR2NR_FORCE) {
   1240    // When forcing main consideration is skipping the prefix. Decimal numbers
   1241    // have no prefixes to skip. pre is not set.
   1242    switch (what & ~(STR2NR_FORCE | STR2NR_QUOTE)) {
   1243    case STR2NR_HEX:
   1244      if (!STRING_ENDED(ptr + 2)
   1245          && ptr[0] == '0'
   1246          && (ptr[1] == 'x' || ptr[1] == 'X')
   1247          && ascii_isxdigit(ptr[2])) {
   1248        ptr += 2;
   1249      }
   1250      goto vim_str2nr_hex;
   1251    case STR2NR_BIN:
   1252      if (!STRING_ENDED(ptr + 2)
   1253          && ptr[0] == '0'
   1254          && (ptr[1] == 'b' || ptr[1] == 'B')
   1255          && ascii_isbdigit(ptr[2])) {
   1256        ptr += 2;
   1257      }
   1258      goto vim_str2nr_bin;
   1259    // Make STR2NR_OOCT work the same as STR2NR_OCT when forcing.
   1260    case STR2NR_OCT:
   1261    case STR2NR_OOCT:
   1262    case STR2NR_OCT | STR2NR_OOCT:
   1263      if (!STRING_ENDED(ptr + 2)
   1264          && ptr[0] == '0'
   1265          && (ptr[1] == 'o' || ptr[1] == 'O')
   1266          && ascii_isodigit(ptr[2])) {
   1267        ptr += 2;
   1268      }
   1269      goto vim_str2nr_oct;
   1270    case 0:
   1271      goto vim_str2nr_dec;
   1272    default:
   1273      abort();
   1274    }
   1275  } else if ((what & (STR2NR_HEX | STR2NR_OCT | STR2NR_OOCT | STR2NR_BIN))
   1276             && !STRING_ENDED(ptr + 1) && ptr[0] == '0' && ptr[1] != '8'
   1277             && ptr[1] != '9') {
   1278    pre = (uint8_t)ptr[1];
   1279    // Detect hexadecimal: 0x or 0X followed by hex digit.
   1280    if ((what & STR2NR_HEX)
   1281        && !STRING_ENDED(ptr + 2)
   1282        && (pre == 'X' || pre == 'x')
   1283        && ascii_isxdigit(ptr[2])) {
   1284      ptr += 2;
   1285      goto vim_str2nr_hex;
   1286    }
   1287    // Detect binary: 0b or 0B followed by 0 or 1.
   1288    if ((what & STR2NR_BIN)
   1289        && !STRING_ENDED(ptr + 2)
   1290        && (pre == 'B' || pre == 'b')
   1291        && ascii_isbdigit(ptr[2])) {
   1292      ptr += 2;
   1293      goto vim_str2nr_bin;
   1294    }
   1295    // Detect octal: 0o or 0O followed by octal digits (without '8' or '9').
   1296    if ((what & STR2NR_OOCT)
   1297        && !STRING_ENDED(ptr + 2)
   1298        && (pre == 'O' || pre == 'o')
   1299        && ascii_isodigit(ptr[2])) {
   1300      ptr += 2;
   1301      goto vim_str2nr_oct;
   1302    }
   1303    // Detect old octal format: 0 followed by octal digits.
   1304    pre = 0;
   1305    if (!(what & STR2NR_OCT)
   1306        || !ascii_isodigit(ptr[1])) {
   1307      goto vim_str2nr_dec;
   1308    }
   1309    for (int i = 2; !STRING_ENDED(ptr + i) && ascii_isdigit(ptr[i]); i++) {
   1310      if (ptr[i] > '7') {
   1311        goto vim_str2nr_dec;
   1312      }
   1313    }
   1314    pre = '0';
   1315    goto vim_str2nr_oct;
   1316  } else {
   1317    goto vim_str2nr_dec;
   1318  }
   1319 
   1320  // Do the conversion manually to avoid sscanf() quirks.
   1321  abort();  // Should’ve used goto earlier.
   1322 #define PARSE_NUMBER(base, cond, conv) \
   1323  do { \
   1324    const char *const after_prefix = ptr; \
   1325    while (!STRING_ENDED(ptr)) { \
   1326      if ((what & STR2NR_QUOTE) && ptr > after_prefix && *ptr == '\'') { \
   1327        ptr++; \
   1328        if (!STRING_ENDED(ptr) && (cond)) { \
   1329          continue; \
   1330        } \
   1331        ptr--; \
   1332      } \
   1333      if (!(cond)) { \
   1334        break; \
   1335      } \
   1336      const uvarnumber_T digit = (uvarnumber_T)(conv); \
   1337      /* avoid ubsan error for overflow */ \
   1338      if (un < UVARNUMBER_MAX / (base) \
   1339          || (un == UVARNUMBER_MAX / (base) \
   1340              && ((base) != 10 || digit <= UVARNUMBER_MAX % 10))) { \
   1341        un = (base) * un + digit; \
   1342      } else { \
   1343        un = UVARNUMBER_MAX; \
   1344        if (overflow != NULL) { \
   1345          *overflow = true; \
   1346        } \
   1347      } \
   1348      ptr++; \
   1349    } \
   1350  } while (0)
   1351 vim_str2nr_bin:
   1352  PARSE_NUMBER(2, (*ptr == '0' || *ptr == '1'), (*ptr - '0'));
   1353  goto vim_str2nr_proceed;
   1354 vim_str2nr_oct:
   1355  PARSE_NUMBER(8, (ascii_isodigit(*ptr)), (*ptr - '0'));
   1356  goto vim_str2nr_proceed;
   1357 vim_str2nr_dec:
   1358  PARSE_NUMBER(10, (ascii_isdigit(*ptr)), (*ptr - '0'));
   1359  goto vim_str2nr_proceed;
   1360 vim_str2nr_hex:
   1361  PARSE_NUMBER(16, (ascii_isxdigit(*ptr)), (hex2nr(*ptr)));
   1362  goto vim_str2nr_proceed;
   1363 #undef PARSE_NUMBER
   1364 
   1365 vim_str2nr_proceed:
   1366  // Check for an alphanumeric character immediately following, that is
   1367  // most likely a typo.
   1368  if (strict && ptr - start != maxlen && ASCII_ISALNUM(*ptr)) {
   1369    return;
   1370  }
   1371 
   1372  if (prep != NULL) {
   1373    *prep = pre;
   1374  }
   1375 
   1376  if (len != NULL) {
   1377    *len = (int)(ptr - start);
   1378  }
   1379 
   1380  if (nptr != NULL) {
   1381    if (negative) {  // account for leading '-' for decimal numbers
   1382      // avoid ubsan error for overflow
   1383      if (un > VARNUMBER_MAX) {
   1384        *nptr = VARNUMBER_MIN;
   1385        if (overflow != NULL) {
   1386          *overflow = true;
   1387        }
   1388      } else {
   1389        *nptr = -(varnumber_T)un;
   1390      }
   1391    } else {
   1392      if (un > VARNUMBER_MAX) {
   1393        un = VARNUMBER_MAX;
   1394        if (overflow != NULL) {
   1395          *overflow = true;
   1396        }
   1397      }
   1398      *nptr = (varnumber_T)un;
   1399    }
   1400  }
   1401 
   1402  if (unptr != NULL) {
   1403    *unptr = un;
   1404  }
   1405 #undef STRING_ENDED
   1406 }
   1407 
   1408 /// Return the value of a single hex character.
   1409 /// Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'.
   1410 ///
   1411 /// @param c
   1412 ///
   1413 /// @return The value of the hex character.
   1414 int hex2nr(int c)
   1415  FUNC_ATTR_CONST
   1416 {
   1417  if ((c >= 'a') && (c <= 'f')) {
   1418    return c - 'a' + 10;
   1419  }
   1420 
   1421  if ((c >= 'A') && (c <= 'F')) {
   1422    return c - 'A' + 10;
   1423  }
   1424  return c - '0';
   1425 }
   1426 
   1427 /// Convert two hex characters to a byte.
   1428 ///
   1429 /// @return  -1 if one of the characters is not hex.
   1430 int hexhex2nr(const char *p)
   1431  FUNC_ATTR_PURE
   1432 {
   1433  if (!ascii_isxdigit(p[0]) || !ascii_isxdigit(p[1])) {
   1434    return -1;
   1435  }
   1436  return (hex2nr(p[0]) << 4) + hex2nr(p[1]);
   1437 }
   1438 
   1439 /// Check that "str" starts with a backslash that should be removed.
   1440 /// For Windows this is only done when the character after the
   1441 /// backslash is not a normal file name character.
   1442 /// '$' is a valid file name character, we don't remove the backslash before
   1443 /// it.  This means it is not possible to use an environment variable after a
   1444 /// backslash.  "C:\$VIM\doc" is taken literally, only "$VIM\doc" works.
   1445 /// Although "\ name" is valid, the backslash in "Program\ files" must be
   1446 /// removed.  Assume a file name doesn't start with a space.
   1447 /// For multi-byte names, never remove a backslash before a non-ascii
   1448 /// character, assume that all multi-byte characters are valid file name
   1449 /// characters.
   1450 ///
   1451 /// @param  str  file path string to check
   1452 bool rem_backslash(const char *str)
   1453  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
   1454 {
   1455 #ifdef BACKSLASH_IN_FILENAME
   1456  return str[0] == '\\'
   1457         && (uint8_t)str[1] < 0x80
   1458         && (str[1] == ' '
   1459             || (str[1] != NUL
   1460                 && str[1] != '*'
   1461                 && str[1] != '?'
   1462                 && !vim_isfilec((uint8_t)str[1])));
   1463 
   1464 #else
   1465  return str[0] == '\\' && str[1] != NUL;
   1466 #endif
   1467 }
   1468 
   1469 /// Halve the number of backslashes in a file name argument.
   1470 ///
   1471 /// @param p
   1472 void backslash_halve(char *p)
   1473 {
   1474  for (; *p && !rem_backslash(p); p++) {}
   1475  if (*p != NUL) {
   1476    char *dst = p;
   1477    goto start;
   1478    while (*p != NUL) {
   1479      if (rem_backslash(p)) {
   1480 start:
   1481        *dst++ = *(p + 1);
   1482        p += 2;
   1483      } else {
   1484        *dst++ = *p++;
   1485      }
   1486    }
   1487    *dst = NUL;
   1488  }
   1489 }
   1490 
   1491 /// backslash_halve() plus save the result in allocated memory.
   1492 ///
   1493 /// @param p
   1494 ///
   1495 /// @return String with the number of backslashes halved.
   1496 char *backslash_halve_save(const char *p)
   1497  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_NONNULL_RET
   1498 {
   1499  char *res = xmalloc(strlen(p) + 1);
   1500  char *dst = res;
   1501  while (*p != NUL) {
   1502    if (rem_backslash(p)) {
   1503      *dst++ = *(p + 1);
   1504      p += 2;
   1505    } else {
   1506      *dst++ = *p++;
   1507    }
   1508  }
   1509  *dst = NUL;
   1510  return res;
   1511 }