charset.c (41711B)
1 /// @file charset.c 2 /// 3 /// Code related to character sets. 4 5 #include <assert.h> 6 #include <errno.h> 7 #include <inttypes.h> 8 #include <limits.h> 9 #include <stdlib.h> 10 #include <string.h> 11 #include <uv.h> 12 13 #include "auto/config.h" 14 #include "klib/kvec.h" 15 #include "nvim/ascii_defs.h" 16 #include "nvim/buffer_defs.h" 17 #include "nvim/charset.h" 18 #include "nvim/cursor.h" 19 #include "nvim/eval/typval_defs.h" 20 #include "nvim/garray.h" 21 #include "nvim/garray_defs.h" 22 #include "nvim/globals.h" 23 #include "nvim/keycodes.h" 24 #include "nvim/macros_defs.h" 25 #include "nvim/mbyte.h" 26 #include "nvim/memory.h" 27 #include "nvim/option.h" 28 #include "nvim/path.h" 29 #include "nvim/pos_defs.h" 30 #include "nvim/strings.h" 31 #include "nvim/types_defs.h" 32 #include "nvim/vim_defs.h" 33 34 #include "charset.c.generated.h" 35 36 static bool chartab_initialized = false; 37 38 // b_chartab[] is an array with 256 bits, each bit representing one of the 39 // characters 0-255. 40 #define SET_CHARTAB(buf, c) \ 41 (buf)->b_chartab[(unsigned)(c) >> 6] |= (1ull << ((c) & 0x3f)) 42 #define RESET_CHARTAB(buf, c) \ 43 (buf)->b_chartab[(unsigned)(c) >> 6] &= ~(1ull << ((c) & 0x3f)) 44 #define GET_CHARTAB_TAB(chartab, c) \ 45 ((chartab)[(unsigned)(c) >> 6] & (1ull << ((c) & 0x3f))) 46 47 // Table used below, see init_chartab() for an explanation 48 static uint8_t g_chartab[256]; 49 50 // Flags for g_chartab[]. 51 #define CT_CELL_MASK 0x07 ///< mask: nr of display cells (1, 2 or 4) 52 #define CT_PRINT_CHAR 0x10 ///< flag: set for printable chars 53 #define CT_ID_CHAR 0x20 ///< flag: set for ID chars 54 #define CT_FNAME_CHAR 0x40 ///< flag: set for file name chars 55 56 /// Fill g_chartab[]. Also fills curbuf->b_chartab[] with flags for keyword 57 /// characters for current buffer. 58 /// 59 /// Depends on the option settings 'iskeyword', 'isident', 'isfname', 60 /// 'isprint' and 'encoding'. 61 /// 62 /// The index in g_chartab[] is the character when first byte is up to 0x80, 63 /// if the first byte is 0x80 and above it depends on further bytes. 64 /// 65 /// The contents of g_chartab[]: 66 /// - The lower two bits, masked by CT_CELL_MASK, give the number of display 67 /// cells the character occupies (1 or 2). Not valid for UTF-8 above 0x80. 68 /// - CT_PRINT_CHAR bit is set when the character is printable (no need to 69 /// translate the character before displaying it). Note that only DBCS 70 /// characters can have 2 display cells and still be printable. 71 /// - CT_FNAME_CHAR bit is set when the character can be in a file name. 72 /// - CT_ID_CHAR bit is set when the character can be in an identifier. 73 /// 74 /// @return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has 75 /// an error, OK otherwise. 76 int init_chartab(void) 77 { 78 return buf_init_chartab(curbuf, true); 79 } 80 81 /// Helper for init_chartab 82 /// 83 /// @param global false: only set buf->b_chartab[] 84 /// 85 /// @return FAIL if 'iskeyword', 'isident', 'isfname' or 'isprint' option has 86 /// an error, OK otherwise. 87 int buf_init_chartab(buf_T *buf, bool global) 88 { 89 if (global) { 90 // Set the default size for printable characters: 91 // From <Space> to '~' is 1 (printable), others are 2 (not printable). 92 // This also inits all 'isident' and 'isfname' flags to false. 93 int c = 0; 94 95 while (c < ' ') { 96 g_chartab[c++] = (dy_flags & kOptDyFlagUhex) ? 4 : 2; 97 } 98 99 while (c <= '~') { 100 g_chartab[c++] = 1 + CT_PRINT_CHAR; 101 } 102 103 while (c < 256) { 104 if (c >= 0xa0) { 105 // UTF-8: bytes 0xa0 - 0xff are printable (latin1) 106 // Also assume that every multi-byte char is a filename character. 107 g_chartab[c++] = (CT_PRINT_CHAR | CT_FNAME_CHAR) + 1; 108 } else { 109 // the rest is unprintable by default 110 g_chartab[c++] = (dy_flags & kOptDyFlagUhex) ? 4 : 2; 111 } 112 } 113 } 114 115 // Init word char flags all to false 116 CLEAR_FIELD(buf->b_chartab); 117 118 // In lisp mode the '-' character is included in keywords. 119 if (buf->b_p_lisp) { 120 SET_CHARTAB(buf, '-'); 121 } 122 123 // Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint' options. 124 for (int i = global ? 0 : 3; i <= 3; i++) { 125 const char *p; 126 if (i == 0) { 127 // first round: 'isident' 128 p = p_isi; 129 } else if (i == 1) { 130 // second round: 'isprint' 131 p = p_isp; 132 } else if (i == 2) { 133 // third round: 'isfname' 134 p = p_isf; 135 } else { // i == 3 136 // fourth round: 'iskeyword' 137 p = buf->b_p_isk; 138 } 139 if (parse_isopt(p, buf, false) == FAIL) { 140 return FAIL; 141 } 142 } 143 144 chartab_initialized = true; 145 return OK; 146 } 147 148 /// Checks the format for the option settings 'iskeyword', 'isident', 'isfname' 149 /// or 'isprint'. 150 /// Returns FAIL if has an error, OK otherwise. 151 int check_isopt(char *var) 152 { 153 return parse_isopt(var, NULL, true); 154 } 155 156 /// @param only_check if false: refill g_chartab[] 157 static int parse_isopt(const char *var, buf_T *buf, bool only_check) 158 { 159 const char *p = var; 160 161 // Parses the 'isident', 'iskeyword', 'isfname' and 'isprint' options. 162 // Each option is a list of characters, character numbers or ranges, 163 // separated by commas, e.g.: "200-210,x,#-178,-" 164 while (*p) { 165 bool tilde = false; 166 bool do_isalpha = false; 167 168 if (*p == '^' && p[1] != NUL) { 169 tilde = true; 170 p++; 171 } 172 173 int c; 174 if (ascii_isdigit(*p)) { 175 c = getdigits_int((char **)&p, true, 0); 176 } else { 177 c = mb_ptr2char_adv(&p); 178 } 179 int c2 = -1; 180 181 if (*p == '-' && p[1] != NUL) { 182 p++; 183 184 if (ascii_isdigit(*p)) { 185 c2 = getdigits_int((char **)&p, true, 0); 186 } else { 187 c2 = mb_ptr2char_adv(&p); 188 } 189 } 190 191 if (c <= 0 || c >= 256 || (c2 < c && c2 != -1) || c2 >= 256 192 || !(*p == NUL || *p == ',')) { 193 return FAIL; 194 } 195 196 bool trail_comma = *p == ','; 197 p = skip_to_option_part(p); 198 if (trail_comma && *p == NUL) { 199 // Trailing comma is not allowed. 200 return FAIL; 201 } 202 203 if (only_check) { 204 continue; 205 } 206 207 if (c2 == -1) { // not a range 208 // A single '@' (not "@-@"): 209 // Decide on letters being ID/printable/keyword chars with 210 // standard function isalpha(). This takes care of locale for 211 // single-byte characters). 212 if (c == '@') { 213 do_isalpha = true; 214 c = 1; 215 c2 = 255; 216 } else { 217 c2 = c; 218 } 219 } 220 221 while (c <= c2) { 222 // Use the MB_ functions here, because isalpha() doesn't 223 // work properly when 'encoding' is "latin1" and the locale is 224 // "C". 225 if (!do_isalpha 226 || mb_islower(c) 227 || mb_isupper(c)) { 228 if (var == p_isi) { // (re)set ID flag 229 if (tilde) { 230 g_chartab[c] &= (uint8_t) ~CT_ID_CHAR; 231 } else { 232 g_chartab[c] |= CT_ID_CHAR; 233 } 234 } else if (var == p_isp) { // (re)set printable 235 if (c < ' ' || c > '~') { 236 if (tilde) { 237 g_chartab[c] = (uint8_t)((g_chartab[c] & ~CT_CELL_MASK) 238 + ((dy_flags & kOptDyFlagUhex) ? 4 : 2)); 239 g_chartab[c] &= (uint8_t) ~CT_PRINT_CHAR; 240 } else { 241 g_chartab[c] = (uint8_t)((g_chartab[c] & ~CT_CELL_MASK) + 1); 242 g_chartab[c] |= CT_PRINT_CHAR; 243 } 244 } 245 } else if (var == p_isf) { // (re)set fname flag 246 if (tilde) { 247 g_chartab[c] &= (uint8_t) ~CT_FNAME_CHAR; 248 } else { 249 g_chartab[c] |= CT_FNAME_CHAR; 250 } 251 } else { // (var == p_isk || var == buf->b_p_isk) (re)set keyword flag 252 if (tilde) { 253 RESET_CHARTAB(buf, c); 254 } else { 255 SET_CHARTAB(buf, c); 256 } 257 } 258 } 259 c++; 260 } 261 } 262 263 return OK; 264 } 265 266 /// Translate any special characters in buf[bufsize] in-place. 267 /// 268 /// The result is a string with only printable characters, but if there is not 269 /// enough room, not all characters will be translated. 270 /// 271 /// @param buf 272 /// @param bufsize 273 void trans_characters(char *buf, int bufsize) 274 { 275 char *trs; // translated character 276 int len = (int)strlen(buf); // length of string needing translation 277 int room = bufsize - len; // room in buffer after string 278 279 while (*buf != 0) { 280 int trs_len; // length of trs[] 281 // Assume a multi-byte character doesn't need translation. 282 if ((trs_len = utfc_ptr2len(buf)) > 1) { 283 len -= trs_len; 284 } else { 285 trs = transchar_byte((uint8_t)(*buf)); 286 trs_len = (int)strlen(trs); 287 288 if (trs_len > 1) { 289 room -= trs_len - 1; 290 if (room <= 0) { 291 return; 292 } 293 memmove(buf + trs_len, buf + 1, (size_t)len); 294 } 295 memmove(buf, trs, (size_t)trs_len); 296 len--; 297 } 298 buf += trs_len; 299 } 300 } 301 302 /// Find length of a string capable of holding s with all specials replaced 303 /// 304 /// Assumes replacing special characters with printable ones just like 305 /// strtrans() does. 306 /// 307 /// @param[in] s String to check. 308 /// 309 /// @return number of bytes needed to hold a translation of `s`, NUL byte not 310 /// included. 311 size_t transstr_len(const char *const s, bool untab) 312 FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE 313 { 314 const char *p = s; 315 size_t len = 0; 316 317 while (*p) { 318 const size_t l = (size_t)utfc_ptr2len(p); 319 if (l > 1) { 320 if (vim_isprintc(utf_ptr2char(p))) { 321 len += l; 322 } else { 323 for (size_t off = 0; off < l; off += (size_t)utf_ptr2len(p + off)) { 324 int c = utf_ptr2char(p + off); 325 char hexbuf[9]; 326 len += transchar_hex(hexbuf, c); 327 } 328 } 329 p += l; 330 } else if (*p == TAB && !untab) { 331 len += 1; 332 p++; 333 } else { 334 const int b2c_l = byte2cells((uint8_t)(*p++)); 335 // Illegal byte sequence may occupy up to 4 characters. 336 len += (size_t)(b2c_l > 0 ? b2c_l : 4); 337 } 338 } 339 return len; 340 } 341 342 /// Replace special characters with printable ones 343 /// 344 /// @param[in] s String to replace characters from. 345 /// @param[out] buf Buffer to which result should be saved. 346 /// @param[in] len Buffer length. Resulting string may not occupy more then 347 /// len - 1 bytes (one for trailing NUL byte). 348 /// @param[in] untab remove tab characters 349 /// 350 /// @return length of the resulting string, without the NUL byte. 351 size_t transstr_buf(const char *const s, const ssize_t slen, char *const buf, const size_t buflen, 352 bool untab) 353 FUNC_ATTR_NONNULL_ALL 354 { 355 const char *p = s; 356 char *buf_p = buf; 357 char *const buf_e = buf_p + buflen - 1; 358 359 while ((slen < 0 || (p - s) < slen) && *p != NUL && buf_p < buf_e) { 360 const size_t l = (size_t)utfc_ptr2len(p); 361 if (l > 1) { 362 if (buf_p + l > buf_e) { 363 break; // Exceeded `buf` size. 364 } 365 366 if (vim_isprintc(utf_ptr2char(p))) { 367 memmove(buf_p, p, l); 368 buf_p += l; 369 } else { 370 for (size_t off = 0; off < l; off += (size_t)utf_ptr2len(p + off)) { 371 int c = utf_ptr2char(p + off); 372 char hexbuf[9]; // <up to 6 bytes>NUL 373 const size_t hexlen = transchar_hex(hexbuf, c); 374 if (buf_p + hexlen > buf_e) { 375 break; 376 } 377 memmove(buf_p, hexbuf, hexlen); 378 buf_p += hexlen; 379 } 380 } 381 p += l; 382 } else if (*p == TAB && !untab) { 383 *buf_p++ = *p++; 384 } else { 385 const char *const tb = transchar_byte((uint8_t)(*p++)); 386 const size_t tb_len = strlen(tb); 387 if (buf_p + tb_len > buf_e) { 388 break; // Exceeded `buf` size. 389 } 390 memmove(buf_p, tb, tb_len); 391 buf_p += tb_len; 392 } 393 } 394 *buf_p = NUL; 395 assert(buf_p <= buf_e); 396 return (size_t)(buf_p - buf); 397 } 398 399 /// Copy string and replace special characters with printable characters 400 /// 401 /// Works like `strtrans()` does, used for that and in some other places. 402 /// 403 /// @param[in] s String to replace characters from. 404 /// 405 /// @return [allocated] translated string 406 char *transstr(const char *const s, bool untab) 407 FUNC_ATTR_NONNULL_RET 408 { 409 // Compute the length of the result, taking account of unprintable 410 // multi-byte characters. 411 const size_t len = transstr_len(s, untab) + 1; 412 char *const buf = xmalloc(len); 413 transstr_buf(s, -1, buf, len, untab); 414 return buf; 415 } 416 417 size_t kv_transstr(StringBuilder *str, const char *const s, bool untab) 418 FUNC_ATTR_NONNULL_ARG(1) 419 { 420 if (!s) { 421 return 0; 422 } 423 424 // Compute the length of the result, taking account of unprintable 425 // multi-byte characters. 426 const size_t len = transstr_len(s, untab); 427 kv_ensure_space(*str, len + 1); 428 transstr_buf(s, -1, str->items + str->size, len + 1, untab); 429 str->size += len; // do not include NUL byte 430 return len; 431 } 432 433 /// Convert the string "str[orglen]" to do ignore-case comparing. 434 /// Use the current locale. 435 /// 436 /// When "buf" is NULL, return an allocated string. 437 /// Otherwise, put the result in buf, limited by buflen, and return buf. 438 char *str_foldcase(char *str, int orglen, char *buf, int buflen) 439 FUNC_ATTR_NONNULL_RET 440 { 441 garray_T ga; 442 int len = orglen; 443 444 #define GA_CHAR(i) ((char *)ga.ga_data)[i] 445 #define GA_PTR(i) ((char *)ga.ga_data + (i)) 446 #define STR_CHAR(i) (buf == NULL ? GA_CHAR(i) : buf[i]) 447 #define STR_PTR(i) (buf == NULL ? GA_PTR(i) : buf + (i)) 448 449 // Copy "str" into "buf" or allocated memory, unmodified. 450 if (buf == NULL) { 451 ga_init(&ga, 1, 10); 452 453 ga_grow(&ga, len + 1); 454 memmove(ga.ga_data, str, (size_t)len); 455 ga.ga_len = len; 456 } else { 457 if (len >= buflen) { 458 // Ugly! 459 len = buflen - 1; 460 } 461 memmove(buf, str, (size_t)len); 462 } 463 464 if (buf == NULL) { 465 GA_CHAR(len) = NUL; 466 } else { 467 buf[len] = NUL; 468 } 469 470 // Make each character lower case. 471 int i = 0; 472 while (STR_CHAR(i) != NUL) { 473 int c = utf_ptr2char(STR_PTR(i)); 474 int olen = utf_ptr2len(STR_PTR(i)); 475 int lc = mb_tolower(c); 476 477 // Only replace the character when it is not an invalid 478 // sequence (ASCII character or more than one byte) and 479 // mb_tolower() doesn't return the original character. 480 if (((c < 0x80) || (olen > 1)) && (c != lc)) { 481 int nlen = utf_char2len(lc); 482 483 // If the byte length changes need to shift the following 484 // characters forward or backward. 485 if (olen != nlen) { 486 if (nlen > olen) { 487 if (buf == NULL) { 488 ga_grow(&ga, nlen - olen + 1); 489 } else { 490 if (len + nlen - olen >= buflen) { 491 // out of memory, keep old char 492 lc = c; 493 nlen = olen; 494 } 495 } 496 } 497 498 if (olen != nlen) { 499 if (buf == NULL) { 500 STRMOVE(GA_PTR(i) + nlen, GA_PTR(i) + olen); 501 ga.ga_len += nlen - olen; 502 } else { 503 STRMOVE(buf + i + nlen, buf + i + olen); 504 len += nlen - olen; 505 } 506 } 507 } 508 utf_char2bytes(lc, STR_PTR(i)); 509 } 510 511 // skip to next multi-byte char 512 i += utfc_ptr2len(STR_PTR(i)); 513 } 514 515 if (buf == NULL) { 516 return ga.ga_data; 517 } 518 return buf; 519 } 520 521 // Catch 22: g_chartab[] can't be initialized before the options are 522 // initialized, and initializing options may cause transchar() to be called! 523 // When chartab_initialized == false don't use g_chartab[]. 524 // Does NOT work for multi-byte characters, c must be <= 255. 525 // Also doesn't work for the first byte of a multi-byte, "c" must be a 526 // character! 527 static uint8_t transchar_charbuf[11]; 528 529 /// Translate a character into a printable one, leaving printable ASCII intact 530 /// 531 /// All unicode characters are considered non-printable in this function. 532 /// 533 /// @param[in] c Character to translate. 534 /// 535 /// @return translated character into a static buffer. 536 char *transchar(int c) 537 { 538 return transchar_buf(curbuf, c); 539 } 540 541 char *transchar_buf(const buf_T *buf, int c) 542 { 543 int i = 0; 544 if (IS_SPECIAL(c)) { 545 // special key code, display as ~@ char 546 transchar_charbuf[0] = '~'; 547 transchar_charbuf[1] = '@'; 548 i = 2; 549 c = K_SECOND(c); 550 } 551 552 if ((!chartab_initialized && (c >= ' ' && c <= '~')) 553 || ((c <= 0xFF) && vim_isprintc(c))) { 554 // printable character 555 transchar_charbuf[i] = (uint8_t)c; 556 transchar_charbuf[i + 1] = NUL; 557 } else if (c <= 0xFF) { 558 transchar_nonprint(buf, (char *)transchar_charbuf + i, c); 559 } else { 560 transchar_hex((char *)transchar_charbuf + i, c); 561 } 562 return (char *)transchar_charbuf; 563 } 564 565 /// Like transchar(), but called with a byte instead of a character. 566 /// 567 /// Checks for an illegal UTF-8 byte. Uses 'fileformat' of the current buffer. 568 /// 569 /// @param[in] c Byte to translate. 570 /// 571 /// @return pointer to translated character in transchar_charbuf. 572 char *transchar_byte(const int c) 573 FUNC_ATTR_WARN_UNUSED_RESULT 574 { 575 return transchar_byte_buf(curbuf, c); 576 } 577 578 /// Like transchar_buf(), but called with a byte instead of a character. 579 /// 580 /// Checks for an illegal UTF-8 byte. Uses 'fileformat' of "buf", unless it is NULL. 581 /// 582 /// @param[in] c Byte to translate. 583 /// 584 /// @return pointer to translated character in transchar_charbuf. 585 char *transchar_byte_buf(const buf_T *buf, const int c) 586 FUNC_ATTR_WARN_UNUSED_RESULT 587 { 588 if (c >= 0x80) { 589 transchar_nonprint(buf, (char *)transchar_charbuf, c); 590 return (char *)transchar_charbuf; 591 } 592 return transchar_buf(buf, c); 593 } 594 595 /// Convert non-printable characters to 2..4 printable ones 596 /// 597 /// @warning Does not work for multi-byte characters, c must be <= 255. 598 /// 599 /// @param[in] buf Required to check the file format 600 /// @param[out] charbuf Buffer to store result in, must be able to hold 601 /// at least 5 bytes (conversion result + NUL). 602 /// @param[in] c Character to convert. NUL is assumed to be NL according to 603 /// `:h NL-used-for-NUL`. 604 void transchar_nonprint(const buf_T *buf, char *charbuf, int c) 605 { 606 if (c == NL) { 607 // we use newline in place of a NUL 608 c = NUL; 609 } else if (buf != NULL && c == CAR && get_fileformat(buf) == EOL_MAC) { 610 // we use CR in place of NL in this case 611 c = NL; 612 } 613 assert(c <= 0xff); 614 615 if (dy_flags & kOptDyFlagUhex || c > 0x7f) { 616 // 'display' has "uhex" 617 transchar_hex(charbuf, c); 618 } else { 619 // 0x00 - 0x1f and 0x7f 620 charbuf[0] = '^'; 621 // DEL displayed as ^? 622 charbuf[1] = (char)(uint8_t)(c ^ 0x40); 623 624 charbuf[2] = NUL; 625 } 626 } 627 628 /// Convert a non-printable character to hex C string like "<FFFF>" 629 /// 630 /// @param[out] buf Buffer to store result in. 631 /// @param[in] c Character to convert. 632 /// 633 /// @return Number of bytes stored in buffer, excluding trailing NUL byte. 634 size_t transchar_hex(char *const buf, const int c) 635 FUNC_ATTR_NONNULL_ALL 636 { 637 size_t i = 0; 638 639 buf[i++] = '<'; 640 if (c > 0xFF) { 641 if (c > 0xFFFF) { 642 buf[i++] = (char)nr2hex((unsigned)c >> 20); 643 buf[i++] = (char)nr2hex((unsigned)c >> 16); 644 } 645 buf[i++] = (char)nr2hex((unsigned)c >> 12); 646 buf[i++] = (char)nr2hex((unsigned)c >> 8); 647 } 648 buf[i++] = (char)(nr2hex((unsigned)c >> 4)); 649 buf[i++] = (char)(nr2hex((unsigned)c)); 650 buf[i++] = '>'; 651 buf[i] = NUL; 652 return i; 653 } 654 655 /// Mirror text "str" for right-left displaying. 656 /// Only works for single-byte characters (e.g., numbers). 657 void rl_mirror_ascii(char *str, char *end) 658 { 659 for (char *p1 = str, *p2 = (end ? end : str + strlen(str)) - 1; p1 < p2; p1++, p2--) { 660 char t = *p1; 661 *p1 = *p2; 662 *p2 = t; 663 } 664 } 665 666 /// Convert the lower 4 bits of byte "c" to its hex character 667 /// 668 /// Lower case letters are used to avoid the confusion of <F1> being 0xf1 or 669 /// function key 1. 670 /// 671 /// @param[in] n Number to convert. 672 /// 673 /// @return the hex character. 674 static inline unsigned nr2hex(unsigned n) 675 FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT 676 { 677 if ((n & 0xf) <= 9) { 678 return (n & 0xf) + '0'; 679 } 680 return (n & 0xf) - 10 + 'a'; 681 } 682 683 /// Return number of display cells occupied by byte "b". 684 /// 685 /// Caller must make sure 0 <= b <= 255. 686 /// For multi-byte mode "b" must be the first byte of a character. 687 /// A TAB is counted as two cells: "^I". 688 /// This will return 0 for bytes >= 0x80, because the number of 689 /// cells depends on further bytes in UTF-8. 690 /// 691 /// @param b 692 /// 693 /// @return Number of display cells. 694 int byte2cells(int b) 695 FUNC_ATTR_PURE 696 { 697 if (b >= 0x80) { 698 return 0; 699 } 700 return g_chartab[b] & CT_CELL_MASK; 701 } 702 703 /// Return number of display cells occupied by character "c". 704 /// 705 /// "c" can be a special key (negative number) in which case 3 or 4 is returned. 706 /// A TAB is counted as two cells: "^I" or four: "<09>". 707 /// 708 /// @param c 709 /// 710 /// @return Number of display cells. 711 int char2cells(int c) 712 { 713 if (IS_SPECIAL(c)) { 714 return char2cells(K_SECOND(c)) + 2; 715 } 716 717 if (c >= 0x80) { 718 // UTF-8: above 0x80 need to check the value 719 return utf_char2cells(c); 720 } 721 return g_chartab[c & 0xff] & CT_CELL_MASK; 722 } 723 724 /// Return number of display cells occupied by character at "*p". 725 /// A TAB is counted as two cells: "^I" or four: "<09>". 726 /// 727 /// @param p 728 /// 729 /// @return number of display cells. 730 int ptr2cells(const char *p_in) 731 { 732 uint8_t *p = (uint8_t *)p_in; 733 // For UTF-8 we need to look at more bytes if the first byte is >= 0x80. 734 if (*p >= 0x80) { 735 return utf_ptr2cells(p_in); 736 } 737 738 // For DBCS we can tell the cell count from the first byte. 739 return g_chartab[*p] & CT_CELL_MASK; 740 } 741 742 /// Return the number of character cells string "s" will take on the screen, 743 /// counting TABs as two characters: "^I". 744 /// 745 /// 's' must be non-null. 746 /// 747 /// @param s 748 /// 749 /// @return number of character cells. 750 int vim_strsize(const char *s) 751 { 752 return vim_strnsize(s, MAXCOL); 753 } 754 755 /// Return the number of character cells string "s[len]" will take on the 756 /// screen, counting TABs as two characters: "^I". 757 /// 758 /// 's' must be non-null. 759 /// 760 /// @param s 761 /// @param len 762 /// 763 /// @return Number of character cells. 764 int vim_strnsize(const char *s, int len) 765 { 766 assert(s != NULL); 767 int size = 0; 768 while (*s != NUL && --len >= 0) { 769 int l = utfc_ptr2len(s); 770 size += ptr2cells(s); 771 s += l; 772 len -= l - 1; 773 } 774 return size; 775 } 776 777 /// Check that "c" is a normal identifier character: 778 /// Letters and characters from the 'isident' option. 779 /// 780 /// @param c character to check 781 bool vim_isIDc(int c) 782 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT 783 { 784 return c > 0 && c < 0x100 && (g_chartab[c] & CT_ID_CHAR); 785 } 786 787 /// Check that "c" is a keyword character: 788 /// Letters and characters from 'iskeyword' option for the current buffer. 789 /// For multi-byte characters mb_get_class() is used (builtin rules). 790 /// 791 /// @param c character to check 792 bool vim_iswordc(const int c) 793 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT 794 { 795 return vim_iswordc_buf(c, curbuf); 796 } 797 798 /// Check that "c" is a keyword character 799 /// Letters and characters from 'iskeyword' option for given buffer. 800 /// For multi-byte characters mb_get_class() is used (builtin rules). 801 /// 802 /// @param[in] c Character to check. 803 /// @param[in] chartab Buffer chartab. 804 bool vim_iswordc_tab(const int c, const uint64_t *const chartab) 805 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL 806 { 807 return (c >= 0x100 808 ? (utf_class_tab(c, chartab) >= 2) 809 : (c > 0 && GET_CHARTAB_TAB(chartab, c) != 0)); 810 } 811 812 /// Check that "c" is a keyword character: 813 /// Letters and characters from 'iskeyword' option for given buffer. 814 /// For multi-byte characters mb_get_class() is used (builtin rules). 815 /// 816 /// @param c character to check 817 /// @param buf buffer whose keywords to use 818 bool vim_iswordc_buf(const int c, buf_T *const buf) 819 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ARG(2) 820 { 821 return vim_iswordc_tab(c, buf->b_chartab); 822 } 823 824 /// Just like vim_iswordc() but uses a pointer to the (multi-byte) character. 825 /// 826 /// @param p pointer to the multi-byte character 827 /// 828 /// @return true if "p" points to a keyword character. 829 bool vim_iswordp(const char *const p) 830 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL 831 { 832 return vim_iswordp_buf(p, curbuf); 833 } 834 835 /// Just like vim_iswordc_buf() but uses a pointer to the (multi-byte) 836 /// character. 837 /// 838 /// @param p pointer to the multi-byte character 839 /// @param buf buffer whose keywords to use 840 /// 841 /// @return true if "p" points to a keyword character. 842 bool vim_iswordp_buf(const char *const p, buf_T *const buf) 843 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL 844 { 845 int c = (uint8_t)(*p); 846 847 if (MB_BYTE2LEN(c) > 1) { 848 c = utf_ptr2char(p); 849 } 850 return vim_iswordc_buf(c, buf); 851 } 852 853 /// Check that "c" is a valid file-name character as specified with the 854 /// 'isfname' option. 855 /// Assume characters above 0x100 are valid (multi-byte). 856 /// To be used for commands like "gf". 857 /// 858 /// @param c character to check 859 bool vim_isfilec(int c) 860 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT 861 { 862 return c >= 0x100 || (c > 0 && (g_chartab[c] & CT_FNAME_CHAR)); 863 } 864 865 /// Check if "c" is a valid file-name character, including characters left 866 /// out of 'isfname' to make "gf" work, such as ',', ' ', '@', ':', etc. 867 bool vim_is_fname_char(int c) 868 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT 869 { 870 return vim_isfilec(c) || c == ',' || c == ' ' || c == '@' || c == ':'; 871 } 872 873 /// Check that "c" is a valid file-name character or a wildcard character 874 /// Assume characters above 0x100 are valid (multi-byte). 875 /// Explicitly interpret ']' as a wildcard character as path_has_wildcard("]") 876 /// returns false. 877 /// 878 /// @param c character to check 879 bool vim_isfilec_or_wc(int c) 880 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT 881 { 882 char buf[2]; 883 buf[0] = (char)c; 884 buf[1] = NUL; 885 return vim_isfilec(c) || c == ']' || path_has_wildcard(buf); 886 } 887 888 /// Check that "c" is a printable character. 889 /// 890 /// @param c character to check 891 bool vim_isprintc(int c) 892 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT 893 { 894 if (c >= 0x100) { 895 return utf_printable(c); 896 } 897 return c > 0 && (g_chartab[c] & CT_PRINT_CHAR); 898 } 899 900 /// skipwhite: skip over ' ' and '\t'. 901 /// 902 /// @param[in] p String to skip in. 903 /// 904 /// @return Pointer to character after the skipped whitespace. 905 char *skipwhite(const char *p) 906 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL 907 FUNC_ATTR_NONNULL_RET 908 { 909 while (ascii_iswhite(*p)) { 910 p++; 911 } 912 return (char *)p; 913 } 914 915 /// Like `skipwhite`, but skip up to `len` characters. 916 /// @see skipwhite 917 /// 918 /// @param[in] p String to skip in. 919 /// @param[in] len Max length to skip. 920 /// 921 /// @return Pointer to character after the skipped whitespace, or the `len`-th 922 /// character in the string. 923 char *skipwhite_len(const char *p, size_t len) 924 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL 925 FUNC_ATTR_NONNULL_RET 926 { 927 for (; len > 0 && ascii_iswhite(*p); len--) { 928 p++; 929 } 930 return (char *)p; 931 } 932 933 // getwhitecols: return the number of whitespace 934 // columns (bytes) at the start of a given line 935 intptr_t getwhitecols_curline(void) 936 { 937 return getwhitecols(get_cursor_line_ptr()); 938 } 939 940 intptr_t getwhitecols(const char *p) 941 FUNC_ATTR_PURE 942 { 943 return skipwhite(p) - p; 944 } 945 946 /// Skip over digits 947 /// 948 /// @param[in] q String to skip digits in. 949 /// 950 /// @return Pointer to the character after the skipped digits. 951 char *skipdigits(const char *q) 952 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL 953 FUNC_ATTR_NONNULL_RET 954 { 955 const char *p = q; 956 while (ascii_isdigit(*p)) { 957 // skip to next non-digit 958 p++; 959 } 960 return (char *)p; 961 } 962 963 /// skip over binary digits 964 /// 965 /// @param q pointer to string 966 /// 967 /// @return Pointer to the character after the skipped digits. 968 const char *skipbin(const char *q) 969 FUNC_ATTR_PURE 970 FUNC_ATTR_NONNULL_ALL 971 FUNC_ATTR_NONNULL_RET 972 { 973 const char *p = q; 974 while (ascii_isbdigit(*p)) { 975 // skip to next non-digit 976 p++; 977 } 978 return p; 979 } 980 981 /// skip over digits and hex characters 982 /// 983 /// @param q 984 /// 985 /// @return Pointer to the character after the skipped digits and hex 986 /// characters. 987 char *skiphex(char *q) 988 FUNC_ATTR_PURE 989 { 990 char *p = q; 991 while (ascii_isxdigit(*p)) { 992 // skip to next non-digit 993 p++; 994 } 995 return p; 996 } 997 998 /// skip to digit (or NUL after the string) 999 /// 1000 /// @param q 1001 /// 1002 /// @return Pointer to the digit or (NUL after the string). 1003 char *skiptodigit(char *q) 1004 FUNC_ATTR_PURE 1005 { 1006 char *p = q; 1007 while (*p != NUL && !ascii_isdigit(*p)) { 1008 // skip to next digit 1009 p++; 1010 } 1011 return p; 1012 } 1013 1014 /// skip to binary character (or NUL after the string) 1015 /// 1016 /// @param q pointer to string 1017 /// 1018 /// @return Pointer to the binary character or (NUL after the string). 1019 const char *skiptobin(const char *q) 1020 FUNC_ATTR_PURE 1021 FUNC_ATTR_NONNULL_ALL 1022 FUNC_ATTR_NONNULL_RET 1023 { 1024 const char *p = q; 1025 while (*p != NUL && !ascii_isbdigit(*p)) { 1026 // skip to next digit 1027 p++; 1028 } 1029 return p; 1030 } 1031 1032 /// skip to hex character (or NUL after the string) 1033 /// 1034 /// @param q 1035 /// 1036 /// @return Pointer to the hex character or (NUL after the string). 1037 char *skiptohex(char *q) 1038 FUNC_ATTR_PURE 1039 { 1040 char *p = q; 1041 while (*p != NUL && !ascii_isxdigit(*p)) { 1042 // skip to next digit 1043 p++; 1044 } 1045 return p; 1046 } 1047 1048 /// Skip over text until ' ' or '\t' or NUL 1049 /// 1050 /// @param[in] p Text to skip over. 1051 /// 1052 /// @return Pointer to the next whitespace or NUL character. 1053 char *skiptowhite(const char *p) 1054 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE 1055 { 1056 while (*p != ' ' && *p != '\t' && *p != NUL) { 1057 p++; 1058 } 1059 return (char *)p; 1060 } 1061 1062 /// skiptowhite_esc: Like skiptowhite(), but also skip escaped chars 1063 /// 1064 /// @param p 1065 /// 1066 /// @return Pointer to the next whitespace character. 1067 char *skiptowhite_esc(const char *p) 1068 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE 1069 { 1070 while (*p != ' ' && *p != '\t' && *p != NUL) { 1071 if (((*p == '\\') || (*p == Ctrl_V)) && (*(p + 1) != NUL)) { 1072 p++; 1073 } 1074 p++; 1075 } 1076 return (char *)p; 1077 } 1078 1079 /// Skip over text until '\n' or NUL. 1080 /// 1081 /// @param[in] p Text to skip over. 1082 /// 1083 /// @return Pointer to the next '\n' or NUL character. 1084 char *skip_to_newline(const char *const p) 1085 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL 1086 FUNC_ATTR_NONNULL_RET 1087 { 1088 return xstrchrnul(p, NL); 1089 } 1090 1091 /// Gets a number from a string and skips over it, signalling overflow. 1092 /// 1093 /// @param[out] pp A pointer to a pointer to char. 1094 /// It will be advanced past the read number. 1095 /// @param[out] nr Number read from the string. 1096 /// 1097 /// @return true on success, false on error/overflow 1098 bool try_getdigits(char **pp, intmax_t *nr) 1099 { 1100 errno = 0; 1101 *nr = strtoimax(*pp, pp, 10); 1102 if (errno == ERANGE && (*nr == INTMAX_MIN || *nr == INTMAX_MAX)) { 1103 return false; 1104 } 1105 return true; 1106 } 1107 1108 /// Gets a number from a string and skips over it. 1109 /// 1110 /// @param[out] pp Pointer to a pointer to char. 1111 /// It will be advanced past the read number. 1112 /// @param strict Abort on overflow. 1113 /// @param def Default value, if parsing fails or overflow occurs. 1114 /// 1115 /// @return Number read from the string, or `def` on parse failure or overflow. 1116 intmax_t getdigits(char **pp, bool strict, intmax_t def) 1117 { 1118 intmax_t number; 1119 int ok = try_getdigits(pp, &number); 1120 if (strict && !ok) { 1121 abort(); 1122 } 1123 return ok ? number : def; 1124 } 1125 1126 /// Gets an int number from a string. 1127 /// 1128 /// @see getdigits 1129 int getdigits_int(char **pp, bool strict, int def) 1130 { 1131 intmax_t number = getdigits(pp, strict, def); 1132 #if SIZEOF_INTMAX_T > SIZEOF_INT 1133 if (strict) { 1134 assert(number >= INT_MIN && number <= INT_MAX); 1135 } else if (!(number >= INT_MIN && number <= INT_MAX)) { 1136 return def; 1137 } 1138 #endif 1139 return (int)number; 1140 } 1141 1142 /// Gets a long number from a string. 1143 /// 1144 /// @see getdigits 1145 long getdigits_long(char **pp, bool strict, long def) 1146 { 1147 intmax_t number = getdigits(pp, strict, def); 1148 #if SIZEOF_INTMAX_T > SIZEOF_LONG 1149 if (strict) { 1150 assert(number >= LONG_MIN && number <= LONG_MAX); 1151 } else if (!(number >= LONG_MIN && number <= LONG_MAX)) { 1152 return def; 1153 } 1154 #endif 1155 return (long)number; 1156 } 1157 1158 /// Gets a int32_t number from a string. 1159 /// 1160 /// @see getdigits 1161 int32_t getdigits_int32(char **pp, bool strict, int32_t def) 1162 { 1163 intmax_t number = getdigits(pp, strict, def); 1164 #if SIZEOF_INTMAX_T > 4 1165 if (strict) { 1166 assert(number >= INT32_MIN && number <= INT32_MAX); 1167 } else if (!(number >= INT32_MIN && number <= INT32_MAX)) { 1168 return def; 1169 } 1170 #endif 1171 return (int32_t)number; 1172 } 1173 1174 /// Check that "lbuf" is empty or only contains blanks. 1175 /// 1176 /// @param lbuf line buffer to check 1177 bool vim_isblankline(char *lbuf) 1178 FUNC_ATTR_PURE 1179 { 1180 char *p = skipwhite(lbuf); 1181 return *p == NUL || *p == '\r' || *p == '\n'; 1182 } 1183 1184 /// Convert a string into a long and/or unsigned long, taking care of 1185 /// hexadecimal, octal and binary numbers. Accepts a '-' sign. 1186 /// If "prep" is not NULL, returns a flag to indicate the type of the number: 1187 /// 0 decimal 1188 /// '0' octal 1189 /// 'O' octal 1190 /// 'o' octal 1191 /// 'B' bin 1192 /// 'b' bin 1193 /// 'X' hex 1194 /// 'x' hex 1195 /// If "len" is not NULL, the length of the number in characters is returned. 1196 /// If "nptr" is not NULL, the signed result is returned in it. 1197 /// If "unptr" is not NULL, the unsigned result is returned in it. 1198 /// If "what" contains STR2NR_BIN recognize binary numbers. 1199 /// If "what" contains STR2NR_OCT recognize octal numbers. 1200 /// If "what" contains STR2NR_HEX recognize hex numbers. 1201 /// If "what" contains STR2NR_FORCE always assume bin/oct/hex. 1202 /// If "what" contains STR2NR_QUOTE ignore embedded single quotes 1203 /// If maxlen > 0, check at a maximum maxlen chars. 1204 /// If strict is true, check the number strictly. return *len = 0 if fail. 1205 /// 1206 /// @param start 1207 /// @param prep Returns guessed type of number 0 = decimal, 'x' or 'X' is 1208 /// hexadecimal, '0', 'o' or 'O' is octal, 'b' or 'B' is binary. 1209 /// When using STR2NR_FORCE is always zero. 1210 /// @param len Returns the detected length of number. 1211 /// @param what Recognizes what number passed, @see ChStr2NrFlags. 1212 /// @param nptr Returns the signed result. 1213 /// @param unptr Returns the unsigned result. 1214 /// @param maxlen Max length of string to check. 1215 /// @param strict If true, fail if the number has unexpected trailing 1216 /// alphanumeric chars: *len is set to 0 and nothing else is 1217 /// returned. 1218 /// @param overflow When not NULL, set to true for overflow. 1219 void vim_str2nr(const char *const start, int *const prep, int *const len, const int what, 1220 varnumber_T *const nptr, uvarnumber_T *const unptr, const int maxlen, 1221 const bool strict, bool *const overflow) 1222 FUNC_ATTR_NONNULL_ARG(1) 1223 { 1224 const char *ptr = start; 1225 #define STRING_ENDED(ptr) \ 1226 (!(maxlen == 0 || (int)((ptr) - start) < maxlen)) 1227 int pre = 0; // default is decimal 1228 const bool negative = (ptr[0] == '-'); 1229 uvarnumber_T un = 0; 1230 1231 if (len != NULL) { 1232 *len = 0; 1233 } 1234 1235 if (negative) { 1236 ptr++; 1237 } 1238 1239 if (what & STR2NR_FORCE) { 1240 // When forcing main consideration is skipping the prefix. Decimal numbers 1241 // have no prefixes to skip. pre is not set. 1242 switch (what & ~(STR2NR_FORCE | STR2NR_QUOTE)) { 1243 case STR2NR_HEX: 1244 if (!STRING_ENDED(ptr + 2) 1245 && ptr[0] == '0' 1246 && (ptr[1] == 'x' || ptr[1] == 'X') 1247 && ascii_isxdigit(ptr[2])) { 1248 ptr += 2; 1249 } 1250 goto vim_str2nr_hex; 1251 case STR2NR_BIN: 1252 if (!STRING_ENDED(ptr + 2) 1253 && ptr[0] == '0' 1254 && (ptr[1] == 'b' || ptr[1] == 'B') 1255 && ascii_isbdigit(ptr[2])) { 1256 ptr += 2; 1257 } 1258 goto vim_str2nr_bin; 1259 // Make STR2NR_OOCT work the same as STR2NR_OCT when forcing. 1260 case STR2NR_OCT: 1261 case STR2NR_OOCT: 1262 case STR2NR_OCT | STR2NR_OOCT: 1263 if (!STRING_ENDED(ptr + 2) 1264 && ptr[0] == '0' 1265 && (ptr[1] == 'o' || ptr[1] == 'O') 1266 && ascii_isodigit(ptr[2])) { 1267 ptr += 2; 1268 } 1269 goto vim_str2nr_oct; 1270 case 0: 1271 goto vim_str2nr_dec; 1272 default: 1273 abort(); 1274 } 1275 } else if ((what & (STR2NR_HEX | STR2NR_OCT | STR2NR_OOCT | STR2NR_BIN)) 1276 && !STRING_ENDED(ptr + 1) && ptr[0] == '0' && ptr[1] != '8' 1277 && ptr[1] != '9') { 1278 pre = (uint8_t)ptr[1]; 1279 // Detect hexadecimal: 0x or 0X followed by hex digit. 1280 if ((what & STR2NR_HEX) 1281 && !STRING_ENDED(ptr + 2) 1282 && (pre == 'X' || pre == 'x') 1283 && ascii_isxdigit(ptr[2])) { 1284 ptr += 2; 1285 goto vim_str2nr_hex; 1286 } 1287 // Detect binary: 0b or 0B followed by 0 or 1. 1288 if ((what & STR2NR_BIN) 1289 && !STRING_ENDED(ptr + 2) 1290 && (pre == 'B' || pre == 'b') 1291 && ascii_isbdigit(ptr[2])) { 1292 ptr += 2; 1293 goto vim_str2nr_bin; 1294 } 1295 // Detect octal: 0o or 0O followed by octal digits (without '8' or '9'). 1296 if ((what & STR2NR_OOCT) 1297 && !STRING_ENDED(ptr + 2) 1298 && (pre == 'O' || pre == 'o') 1299 && ascii_isodigit(ptr[2])) { 1300 ptr += 2; 1301 goto vim_str2nr_oct; 1302 } 1303 // Detect old octal format: 0 followed by octal digits. 1304 pre = 0; 1305 if (!(what & STR2NR_OCT) 1306 || !ascii_isodigit(ptr[1])) { 1307 goto vim_str2nr_dec; 1308 } 1309 for (int i = 2; !STRING_ENDED(ptr + i) && ascii_isdigit(ptr[i]); i++) { 1310 if (ptr[i] > '7') { 1311 goto vim_str2nr_dec; 1312 } 1313 } 1314 pre = '0'; 1315 goto vim_str2nr_oct; 1316 } else { 1317 goto vim_str2nr_dec; 1318 } 1319 1320 // Do the conversion manually to avoid sscanf() quirks. 1321 abort(); // Should’ve used goto earlier. 1322 #define PARSE_NUMBER(base, cond, conv) \ 1323 do { \ 1324 const char *const after_prefix = ptr; \ 1325 while (!STRING_ENDED(ptr)) { \ 1326 if ((what & STR2NR_QUOTE) && ptr > after_prefix && *ptr == '\'') { \ 1327 ptr++; \ 1328 if (!STRING_ENDED(ptr) && (cond)) { \ 1329 continue; \ 1330 } \ 1331 ptr--; \ 1332 } \ 1333 if (!(cond)) { \ 1334 break; \ 1335 } \ 1336 const uvarnumber_T digit = (uvarnumber_T)(conv); \ 1337 /* avoid ubsan error for overflow */ \ 1338 if (un < UVARNUMBER_MAX / (base) \ 1339 || (un == UVARNUMBER_MAX / (base) \ 1340 && ((base) != 10 || digit <= UVARNUMBER_MAX % 10))) { \ 1341 un = (base) * un + digit; \ 1342 } else { \ 1343 un = UVARNUMBER_MAX; \ 1344 if (overflow != NULL) { \ 1345 *overflow = true; \ 1346 } \ 1347 } \ 1348 ptr++; \ 1349 } \ 1350 } while (0) 1351 vim_str2nr_bin: 1352 PARSE_NUMBER(2, (*ptr == '0' || *ptr == '1'), (*ptr - '0')); 1353 goto vim_str2nr_proceed; 1354 vim_str2nr_oct: 1355 PARSE_NUMBER(8, (ascii_isodigit(*ptr)), (*ptr - '0')); 1356 goto vim_str2nr_proceed; 1357 vim_str2nr_dec: 1358 PARSE_NUMBER(10, (ascii_isdigit(*ptr)), (*ptr - '0')); 1359 goto vim_str2nr_proceed; 1360 vim_str2nr_hex: 1361 PARSE_NUMBER(16, (ascii_isxdigit(*ptr)), (hex2nr(*ptr))); 1362 goto vim_str2nr_proceed; 1363 #undef PARSE_NUMBER 1364 1365 vim_str2nr_proceed: 1366 // Check for an alphanumeric character immediately following, that is 1367 // most likely a typo. 1368 if (strict && ptr - start != maxlen && ASCII_ISALNUM(*ptr)) { 1369 return; 1370 } 1371 1372 if (prep != NULL) { 1373 *prep = pre; 1374 } 1375 1376 if (len != NULL) { 1377 *len = (int)(ptr - start); 1378 } 1379 1380 if (nptr != NULL) { 1381 if (negative) { // account for leading '-' for decimal numbers 1382 // avoid ubsan error for overflow 1383 if (un > VARNUMBER_MAX) { 1384 *nptr = VARNUMBER_MIN; 1385 if (overflow != NULL) { 1386 *overflow = true; 1387 } 1388 } else { 1389 *nptr = -(varnumber_T)un; 1390 } 1391 } else { 1392 if (un > VARNUMBER_MAX) { 1393 un = VARNUMBER_MAX; 1394 if (overflow != NULL) { 1395 *overflow = true; 1396 } 1397 } 1398 *nptr = (varnumber_T)un; 1399 } 1400 } 1401 1402 if (unptr != NULL) { 1403 *unptr = un; 1404 } 1405 #undef STRING_ENDED 1406 } 1407 1408 /// Return the value of a single hex character. 1409 /// Only valid when the argument is '0' - '9', 'A' - 'F' or 'a' - 'f'. 1410 /// 1411 /// @param c 1412 /// 1413 /// @return The value of the hex character. 1414 int hex2nr(int c) 1415 FUNC_ATTR_CONST 1416 { 1417 if ((c >= 'a') && (c <= 'f')) { 1418 return c - 'a' + 10; 1419 } 1420 1421 if ((c >= 'A') && (c <= 'F')) { 1422 return c - 'A' + 10; 1423 } 1424 return c - '0'; 1425 } 1426 1427 /// Convert two hex characters to a byte. 1428 /// 1429 /// @return -1 if one of the characters is not hex. 1430 int hexhex2nr(const char *p) 1431 FUNC_ATTR_PURE 1432 { 1433 if (!ascii_isxdigit(p[0]) || !ascii_isxdigit(p[1])) { 1434 return -1; 1435 } 1436 return (hex2nr(p[0]) << 4) + hex2nr(p[1]); 1437 } 1438 1439 /// Check that "str" starts with a backslash that should be removed. 1440 /// For Windows this is only done when the character after the 1441 /// backslash is not a normal file name character. 1442 /// '$' is a valid file name character, we don't remove the backslash before 1443 /// it. This means it is not possible to use an environment variable after a 1444 /// backslash. "C:\$VIM\doc" is taken literally, only "$VIM\doc" works. 1445 /// Although "\ name" is valid, the backslash in "Program\ files" must be 1446 /// removed. Assume a file name doesn't start with a space. 1447 /// For multi-byte names, never remove a backslash before a non-ascii 1448 /// character, assume that all multi-byte characters are valid file name 1449 /// characters. 1450 /// 1451 /// @param str file path string to check 1452 bool rem_backslash(const char *str) 1453 FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL 1454 { 1455 #ifdef BACKSLASH_IN_FILENAME 1456 return str[0] == '\\' 1457 && (uint8_t)str[1] < 0x80 1458 && (str[1] == ' ' 1459 || (str[1] != NUL 1460 && str[1] != '*' 1461 && str[1] != '?' 1462 && !vim_isfilec((uint8_t)str[1]))); 1463 1464 #else 1465 return str[0] == '\\' && str[1] != NUL; 1466 #endif 1467 } 1468 1469 /// Halve the number of backslashes in a file name argument. 1470 /// 1471 /// @param p 1472 void backslash_halve(char *p) 1473 { 1474 for (; *p && !rem_backslash(p); p++) {} 1475 if (*p != NUL) { 1476 char *dst = p; 1477 goto start; 1478 while (*p != NUL) { 1479 if (rem_backslash(p)) { 1480 start: 1481 *dst++ = *(p + 1); 1482 p += 2; 1483 } else { 1484 *dst++ = *p++; 1485 } 1486 } 1487 *dst = NUL; 1488 } 1489 } 1490 1491 /// backslash_halve() plus save the result in allocated memory. 1492 /// 1493 /// @param p 1494 /// 1495 /// @return String with the number of backslashes halved. 1496 char *backslash_halve_save(const char *p) 1497 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_NONNULL_RET 1498 { 1499 char *res = xmalloc(strlen(p) + 1); 1500 char *dst = res; 1501 while (*p != NUL) { 1502 if (rem_backslash(p)) { 1503 *dst++ = *(p + 1); 1504 p += 2; 1505 } else { 1506 *dst++ = *p++; 1507 } 1508 } 1509 *dst = NUL; 1510 return res; 1511 }