fileio.c (121639B)
1 // fileio.c: read from and write to a file 2 3 #include <assert.h> 4 #include <errno.h> 5 #include <fcntl.h> 6 #include <iconv.h> 7 #include <inttypes.h> 8 #include <limits.h> 9 #include <stdbool.h> 10 #include <stddef.h> 11 #include <stdio.h> 12 #include <string.h> 13 #include <sys/stat.h> 14 #include <sys/types.h> 15 #include <time.h> 16 #include <uv.h> 17 18 #include "auto/config.h" 19 #include "nvim/ascii_defs.h" 20 #include "nvim/autocmd.h" 21 #include "nvim/autocmd_defs.h" 22 #include "nvim/buffer.h" 23 #include "nvim/buffer_defs.h" 24 #include "nvim/buffer_updates.h" 25 #include "nvim/change.h" 26 #include "nvim/cursor.h" 27 #include "nvim/diff.h" 28 #include "nvim/drawscreen.h" 29 #include "nvim/edit.h" 30 #include "nvim/errors.h" 31 #include "nvim/eval.h" 32 #include "nvim/eval/vars.h" 33 #include "nvim/ex_cmds_defs.h" 34 #include "nvim/ex_eval.h" 35 #include "nvim/fileio.h" 36 #include "nvim/fold.h" 37 #include "nvim/garray.h" 38 #include "nvim/garray_defs.h" 39 #include "nvim/getchar.h" 40 #include "nvim/gettext_defs.h" 41 #include "nvim/globals.h" 42 #include "nvim/highlight_defs.h" 43 #include "nvim/iconv_defs.h" 44 #include "nvim/log.h" 45 #include "nvim/macros_defs.h" 46 #include "nvim/mbyte.h" 47 #include "nvim/mbyte_defs.h" 48 #include "nvim/memfile.h" 49 #include "nvim/memfile_defs.h" 50 #include "nvim/memline.h" 51 #include "nvim/memline_defs.h" 52 #include "nvim/memory.h" 53 #include "nvim/message.h" 54 #include "nvim/move.h" 55 #include "nvim/option.h" 56 #include "nvim/option_defs.h" 57 #include "nvim/option_vars.h" 58 #include "nvim/os/fs.h" 59 #include "nvim/os/fs_defs.h" 60 #include "nvim/os/input.h" 61 #include "nvim/os/os.h" 62 #include "nvim/os/os_defs.h" 63 #include "nvim/os/time.h" 64 #include "nvim/path.h" 65 #include "nvim/pos_defs.h" 66 #include "nvim/regexp.h" 67 #include "nvim/regexp_defs.h" 68 #include "nvim/sha256.h" 69 #include "nvim/shada.h" 70 #include "nvim/state_defs.h" 71 #include "nvim/strings.h" 72 #include "nvim/types_defs.h" 73 #include "nvim/ui.h" 74 #include "nvim/undo.h" 75 #include "nvim/undo_defs.h" 76 #include "nvim/vim_defs.h" 77 78 #ifdef BACKSLASH_IN_FILENAME 79 # include "nvim/charset.h" 80 #endif 81 82 #ifdef HAVE_DIRFD_AND_FLOCK 83 # include <dirent.h> 84 # include <sys/file.h> 85 #endif 86 87 #ifdef OPEN_CHR_FILES 88 # include "nvim/charset.h" 89 #endif 90 91 // For compatibility with libuv < 1.20.0 (tested on 1.18.0) 92 #ifndef UV_FS_COPYFILE_FICLONE 93 # define UV_FS_COPYFILE_FICLONE 0 94 #endif 95 96 #include "fileio.c.generated.h" 97 98 static const char *e_auchangedbuf = N_("E812: Autocommands changed buffer or buffer name"); 99 100 void filemess(buf_T *buf, char *name, char *s) 101 { 102 int prev_msg_col = msg_col; 103 104 if (msg_silent != 0) { 105 return; 106 } 107 108 add_quoted_fname(IObuff, IOSIZE - 100, buf, name); 109 110 // Avoid an over-long translation to cause trouble. 111 xstrlcat(IObuff, s, IOSIZE); 112 113 // For the first message may have to start a new line. 114 // For further ones overwrite the previous one, reset msg_scroll before 115 // calling filemess(). 116 int msg_scroll_save = msg_scroll; 117 if (shortmess(SHM_OVERALL) && !msg_listdo_overwrite && !exiting && p_verbose == 0) { 118 msg_scroll = false; 119 } 120 if (!msg_scroll) { // wait a bit when overwriting an error msg 121 msg_check_for_delay(false); 122 } 123 msg_start(); 124 if (prev_msg_col != 0 && msg_col == 0) { 125 msg_putchar('\r'); // overwrite any previous message. 126 } 127 msg_scroll = msg_scroll_save; 128 msg_scrolled_ign = true; 129 // may truncate the message to avoid a hit-return prompt 130 msg_outtrans(msg_may_trunc(false, IObuff), 0, false); 131 msg_clr_eos(); 132 ui_flush(); 133 msg_scrolled_ign = false; 134 } 135 136 /// Read lines from file "fname" into the buffer after line "from". 137 /// 138 /// 1. We allocate blocks with try_malloc, as big as possible. 139 /// 2. Each block is filled with characters from the file with a single read(). 140 /// 3. The lines are inserted in the buffer with ml_append(). 141 /// 142 /// (caller must check that fname != NULL, unless READ_STDIN is used) 143 /// 144 /// "lines_to_skip" is the number of lines that must be skipped 145 /// "lines_to_read" is the number of lines that are appended 146 /// When not recovering lines_to_skip is 0 and lines_to_read MAXLNUM. 147 /// 148 /// flags: 149 /// READ_NEW starting to edit a new buffer 150 /// READ_FILTER reading filter output 151 /// READ_STDIN read from stdin instead of a file 152 /// READ_BUFFER read from curbuf instead of a file (converting after reading 153 /// stdin) 154 /// READ_NOFILE do not read a file, only trigger BufReadCmd 155 /// READ_DUMMY read into a dummy buffer (to check if file contents changed) 156 /// READ_KEEP_UNDO don't clear undo info or read it from a file 157 /// READ_FIFO read from fifo/socket instead of a file 158 /// 159 /// @param eap can be NULL! 160 /// 161 /// @return FAIL for failure, NOTDONE for directory (failure), or OK 162 int readfile(char *fname, char *sfname, linenr_T from, linenr_T lines_to_skip, 163 linenr_T lines_to_read, exarg_T *eap, int flags, bool silent) 164 { 165 int retval = FAIL; // jump to "theend" instead of returning 166 int fd = stdin_fd >= 0 ? stdin_fd : 0; 167 bool newfile = (flags & READ_NEW); 168 bool filtering = (flags & READ_FILTER); 169 bool read_stdin = (flags & READ_STDIN); 170 bool read_buffer = (flags & READ_BUFFER); 171 bool read_fifo = (flags & READ_FIFO); 172 bool set_options = newfile || read_buffer || (eap != NULL && eap->read_edit); 173 linenr_T read_buf_lnum = 1; // next line to read from curbuf 174 colnr_T read_buf_col = 0; // next char to read from this line 175 char c; 176 linenr_T lnum = from; 177 char *ptr = NULL; // pointer into read buffer 178 char *buffer = NULL; // read buffer 179 char *new_buffer = NULL; // init to shut up gcc 180 char *line_start = NULL; // init to shut up gcc 181 int wasempty; // buffer was empty before reading 182 colnr_T len; 183 ptrdiff_t size = 0; 184 uint8_t *p = NULL; 185 off_T filesize = 0; 186 bool skip_read = false; 187 context_sha256_T sha_ctx; 188 bool read_undo_file = false; 189 int split = 0; // number of split lines 190 linenr_T linecnt; 191 bool error = false; // errors encountered 192 int ff_error = EOL_UNKNOWN; // file format with errors 193 ptrdiff_t linerest = 0; // remaining chars in line 194 int perm = 0; 195 #ifdef UNIX 196 int swap_mode = -1; // protection bits for swap file 197 #endif 198 int fileformat = 0; // end-of-line format 199 bool keep_fileformat = false; 200 FileInfo file_info; 201 linenr_T skip_count = 0; 202 linenr_T read_count = 0; 203 int msg_save = msg_scroll; 204 linenr_T read_no_eol_lnum = 0; // non-zero lnum when last line of 205 // last read was missing the eol 206 bool file_rewind = false; 207 linenr_T conv_error = 0; // line nr with conversion error 208 linenr_T illegal_byte = 0; // line nr with illegal byte 209 bool keep_dest_enc = false; // don't retry when char doesn't fit 210 // in destination encoding 211 int bad_char_behavior = BAD_REPLACE; 212 // BAD_KEEP, BAD_DROP or character to 213 // replace with 214 char *tmpname = NULL; // name of 'charconvert' output file 215 int fio_flags = 0; 216 char *fenc; // fileencoding to use 217 bool fenc_alloced; // fenc_next is in allocated memory 218 char *fenc_next = NULL; // next item in 'fencs' or NULL 219 bool advance_fenc = false; 220 int real_size = 0; 221 iconv_t iconv_fd = (iconv_t)-1; // descriptor for iconv() or -1 222 bool did_iconv = false; // true when iconv() failed and trying 223 // 'charconvert' next 224 bool converted = false; // true if conversion done 225 bool notconverted = false; // true if conversion wanted but it wasn't possible 226 char conv_rest[CONV_RESTLEN]; 227 int conv_restlen = 0; // nr of bytes in conv_rest[] 228 pos_T orig_start; 229 buf_T *old_curbuf; 230 char *old_b_ffname; 231 char *old_b_fname; 232 int using_b_ffname; 233 int using_b_fname; 234 static char *msg_is_a_directory = N_("is a directory"); 235 236 curbuf->b_au_did_filetype = false; // reset before triggering any autocommands 237 238 curbuf->b_no_eol_lnum = 0; // in case it was set by the previous read 239 240 // If there is no file name yet, use the one for the read file. 241 // BF_NOTEDITED is set to reflect this. 242 // Don't do this for a read from a filter. 243 // Only do this when 'cpoptions' contains the 'f' flag. 244 if (curbuf->b_ffname == NULL 245 && !filtering 246 && fname != NULL 247 && vim_strchr(p_cpo, CPO_FNAMER) != NULL 248 && !(flags & READ_DUMMY)) { 249 if (set_rw_fname(fname, sfname) == FAIL) { 250 goto theend; 251 } 252 } 253 254 // Remember the initial values of curbuf, curbuf->b_ffname and 255 // curbuf->b_fname to detect whether they are altered as a result of 256 // executing nasty autocommands. Also check if "fname" and "sfname" 257 // point to one of these values. 258 old_curbuf = curbuf; 259 old_b_ffname = curbuf->b_ffname; 260 old_b_fname = curbuf->b_fname; 261 using_b_ffname = (fname == curbuf->b_ffname) || (sfname == curbuf->b_ffname); 262 using_b_fname = (fname == curbuf->b_fname) || (sfname == curbuf->b_fname); 263 264 // After reading a file the cursor line changes but we don't want to 265 // display the line. 266 ex_no_reprint = true; 267 268 // don't display the file info for another buffer now 269 need_fileinfo = false; 270 271 // For Unix: Use the short file name whenever possible. 272 // Avoids problems with networks and when directory names are changed. 273 // Don't do this for Windows, a "cd" in a sub-shell may have moved us to 274 // another directory, which we don't detect. 275 if (sfname == NULL) { 276 sfname = fname; 277 } 278 #if defined(UNIX) 279 fname = sfname; 280 #endif 281 282 // The BufReadCmd and FileReadCmd events intercept the reading process by 283 // executing the associated commands instead. 284 if (!filtering && !read_stdin && !read_buffer) { 285 orig_start = curbuf->b_op_start; 286 287 // Set '[ mark to the line above where the lines go (line 1 if zero). 288 curbuf->b_op_start.lnum = ((from == 0) ? 1 : from); 289 curbuf->b_op_start.col = 0; 290 291 if (newfile) { 292 if (apply_autocmds_exarg(EVENT_BUFREADCMD, NULL, sfname, 293 false, curbuf, eap)) { 294 retval = OK; 295 if (aborting()) { 296 retval = FAIL; 297 } 298 299 // The BufReadCmd code usually uses ":read" to get the text and 300 // perhaps ":file" to change the buffer name. But we should 301 // consider this to work like ":edit", thus reset the 302 // BF_NOTEDITED flag. Then ":write" will work to overwrite the 303 // same file. 304 if (retval == OK) { 305 curbuf->b_flags &= ~BF_NOTEDITED; 306 } 307 goto theend; 308 } 309 } else if (apply_autocmds_exarg(EVENT_FILEREADCMD, sfname, sfname, 310 false, NULL, eap)) { 311 retval = aborting() ? FAIL : OK; 312 goto theend; 313 } 314 315 curbuf->b_op_start = orig_start; 316 317 if (flags & READ_NOFILE) { 318 // Return NOTDONE instead of FAIL so that BufEnter can be triggered 319 // and other operations don't fail. 320 retval = NOTDONE; 321 goto theend; 322 } 323 } 324 325 if (((shortmess(SHM_OVER) && !msg_listdo_overwrite) || curbuf->b_help) && p_verbose == 0) { 326 msg_scroll = false; // overwrite previous file message 327 } else { 328 msg_scroll = true; // don't overwrite previous file message 329 } 330 // If the name is too long we might crash further on, quit here. 331 if (fname != NULL && *fname != NUL) { 332 size_t fnamelen = strlen(fname); 333 334 // If the name is too long we might crash further on, quit here. 335 if (fnamelen >= MAXPATHL) { 336 filemess(curbuf, fname, _("Illegal file name")); 337 msg_end(); 338 msg_scroll = msg_save; 339 goto theend; 340 } 341 342 // If the name ends in a path separator, we can't open it. Check here, 343 // because reading the file may actually work, but then creating the 344 // swap file may destroy it! Reported on MS-DOS and Win 95. 345 if (after_pathsep(fname, fname + fnamelen)) { 346 if (!silent) { 347 filemess(curbuf, fname, _(msg_is_a_directory)); 348 } 349 msg_end(); 350 msg_scroll = msg_save; 351 retval = NOTDONE; 352 goto theend; 353 } 354 } 355 356 if (!read_stdin && fname != NULL) { 357 perm = os_getperm(fname); 358 } 359 360 #ifdef OPEN_CHR_FILES 361 # define IS_CHR_DEV(perm, fname) S_ISCHR(perm) && is_dev_fd_file(fname) 362 #else 363 # define IS_CHR_DEV(perm, fname) false 364 #endif 365 366 if (!read_stdin && !read_buffer && !read_fifo) { 367 if (perm >= 0 && !S_ISREG(perm) // not a regular file ... 368 && !S_ISFIFO(perm) // ... or fifo 369 && !S_ISSOCK(perm) // ... or socket 370 && !(IS_CHR_DEV(perm, fname)) 371 // ... or a character special file named /dev/fd/<n> 372 ) { 373 // On Unix it is possible to read a directory, so we have to 374 // check for it before os_open(). 375 if (S_ISDIR(perm)) { 376 if (!silent) { 377 filemess(curbuf, fname, _(msg_is_a_directory)); 378 } 379 retval = NOTDONE; 380 } else { 381 filemess(curbuf, fname, _("is not a file")); 382 } 383 msg_end(); 384 msg_scroll = msg_save; 385 goto theend; 386 } 387 } 388 389 // Set default or forced 'fileformat' and 'binary'. 390 set_file_options(set_options, eap); 391 392 // When opening a new file we take the readonly flag from the file. 393 // Default is r/w, can be set to r/o below. 394 // Don't reset it when in readonly mode 395 // Only set/reset b_p_ro when BF_CHECK_RO is set. 396 bool check_readonly = (newfile && (curbuf->b_flags & BF_CHECK_RO)); 397 if (check_readonly && !readonlymode) { 398 curbuf->b_p_ro = false; 399 } 400 401 if (newfile && !read_stdin && !read_buffer && !read_fifo) { 402 // Remember time of file. 403 if (os_fileinfo(fname, &file_info)) { 404 buf_store_file_info(curbuf, &file_info); 405 curbuf->b_mtime_read = curbuf->b_mtime; 406 curbuf->b_mtime_read_ns = curbuf->b_mtime_ns; 407 #ifdef UNIX 408 // Use the protection bits of the original file for the swap file. 409 // This makes it possible for others to read the name of the 410 // edited file from the swapfile, but only if they can read the 411 // edited file. 412 // Remove the "write" and "execute" bits for group and others 413 // (they must not write the swapfile). 414 // Add the "read" and "write" bits for the user, otherwise we may 415 // not be able to write to the file ourselves. 416 // Setting the bits is done below, after creating the swap file. 417 swap_mode = ((int)file_info.stat.st_mode & 0644) | 0600; 418 #endif 419 } else { 420 curbuf->b_mtime = 0; 421 curbuf->b_mtime_ns = 0; 422 curbuf->b_mtime_read = 0; 423 curbuf->b_mtime_read_ns = 0; 424 curbuf->b_orig_size = 0; 425 curbuf->b_orig_mode = 0; 426 } 427 428 // Reset the "new file" flag. It will be set again below when the 429 // file doesn't exist. 430 curbuf->b_flags &= ~(BF_NEW | BF_NEW_W); 431 } 432 433 // Check readonly. 434 bool file_readonly = false; 435 if (!read_buffer && !read_stdin) { 436 if (!newfile || readonlymode || !(perm & 0222) 437 || !os_file_is_writable(fname)) { 438 file_readonly = true; 439 } 440 fd = os_open(fname, O_RDONLY, 0); 441 } 442 443 if (fd < 0) { // cannot open at all 444 msg_scroll = msg_save; 445 if (!newfile) { 446 goto theend; 447 } 448 if (perm == UV_ENOENT) { // check if the file exists 449 // Set the 'new-file' flag, so that when the file has 450 // been created by someone else, a ":w" will complain. 451 curbuf->b_flags |= BF_NEW; 452 453 // Create a swap file now, so that other Vims are warned 454 // that we are editing this file. Don't do this for a 455 // "nofile" or "nowrite" buffer type. 456 if (!bt_dontwrite(curbuf)) { 457 check_need_swap(newfile); 458 // SwapExists autocommand may mess things up 459 if (curbuf != old_curbuf 460 || (using_b_ffname 461 && (old_b_ffname != curbuf->b_ffname)) 462 || (using_b_fname 463 && (old_b_fname != curbuf->b_fname))) { 464 emsg(_(e_auchangedbuf)); 465 goto theend; 466 } 467 } 468 if (!silent) { 469 if (dir_of_file_exists(fname)) { 470 filemess(curbuf, sfname, _("[New]")); 471 } else { 472 filemess(curbuf, sfname, _("[New DIRECTORY]")); 473 } 474 } 475 // Even though this is a new file, it might have been 476 // edited before and deleted. Get the old marks. 477 check_marks_read(); 478 // Set forced 'fileencoding'. 479 if (eap != NULL) { 480 set_forced_fenc(eap); 481 } 482 apply_autocmds_exarg(EVENT_BUFNEWFILE, sfname, sfname, 483 false, curbuf, eap); 484 // remember the current fileformat 485 save_file_ff(curbuf); 486 487 if (!aborting()) { // autocmds may abort script processing 488 retval = OK; // a new file is not an error 489 } 490 goto theend; 491 } 492 #if defined(UNIX) && defined(EOVERFLOW) 493 filemess(curbuf, sfname, ((fd == UV_EFBIG) ? _("[File too big]") 494 : 495 // libuv only returns -errno 496 // in Unix and in Windows 497 // open() does not set 498 // EOVERFLOW 499 (fd == -EOVERFLOW) ? _("[File too big]") 500 : _("[Permission Denied]"))); 501 #else 502 filemess(curbuf, sfname, ((fd == UV_EFBIG) ? _("[File too big]") 503 : _("[Permission Denied]"))); 504 #endif 505 curbuf->b_p_ro = true; // must use "w!" now 506 507 goto theend; 508 } 509 510 // Only set the 'ro' flag for readonly files the first time they are 511 // loaded. Help files always get readonly mode 512 if ((check_readonly && file_readonly) || curbuf->b_help) { 513 curbuf->b_p_ro = true; 514 } 515 516 if (set_options) { 517 // Don't change 'eol' if reading from buffer as it will already be 518 // correctly set when reading stdin. 519 if (!read_buffer) { 520 curbuf->b_p_eof = false; 521 curbuf->b_start_eof = false; 522 curbuf->b_p_eol = true; 523 curbuf->b_start_eol = true; 524 } 525 curbuf->b_p_bomb = false; 526 curbuf->b_start_bomb = false; 527 } 528 529 // Create a swap file now, so that other Vims are warned that we are 530 // editing this file. 531 // Don't do this for a "nofile" or "nowrite" buffer type. 532 if (!bt_dontwrite(curbuf)) { 533 check_need_swap(newfile); 534 if (!read_stdin 535 && (curbuf != old_curbuf 536 || (using_b_ffname && (old_b_ffname != curbuf->b_ffname)) 537 || (using_b_fname && (old_b_fname != curbuf->b_fname)))) { 538 emsg(_(e_auchangedbuf)); 539 if (!read_buffer) { 540 close(fd); 541 } 542 goto theend; 543 } 544 #ifdef UNIX 545 // Set swap file protection bits after creating it. 546 if (swap_mode > 0 && curbuf->b_ml.ml_mfp != NULL 547 && curbuf->b_ml.ml_mfp->mf_fname != NULL) { 548 const char *swap_fname = curbuf->b_ml.ml_mfp->mf_fname; 549 550 // If the group-read bit is set but not the world-read bit, then 551 // the group must be equal to the group of the original file. If 552 // we can't make that happen then reset the group-read bit. This 553 // avoids making the swap file readable to more users when the 554 // primary group of the user is too permissive. 555 if ((swap_mode & 044) == 040) { 556 FileInfo swap_info; 557 558 if (os_fileinfo(swap_fname, &swap_info) 559 && file_info.stat.st_gid != swap_info.stat.st_gid 560 && os_fchown(curbuf->b_ml.ml_mfp->mf_fd, (uv_uid_t)(-1), 561 (uv_gid_t)file_info.stat.st_gid) 562 == -1) { 563 swap_mode &= 0600; 564 } 565 } 566 567 os_setperm(swap_fname, swap_mode); 568 } 569 #endif 570 } 571 572 // If "Quit" selected at ATTENTION dialog, don't load the file. 573 if (swap_exists_action == SEA_QUIT) { 574 if (!read_buffer && !read_stdin) { 575 close(fd); 576 } 577 goto theend; 578 } 579 580 no_wait_return++; // don't wait for return yet 581 582 // Set '[ mark to the line above where the lines go (line 1 if zero). 583 orig_start = curbuf->b_op_start; 584 curbuf->b_op_start.lnum = ((from == 0) ? 1 : from); 585 curbuf->b_op_start.col = 0; 586 587 int try_mac = (vim_strchr(p_ffs, 'm') != NULL); 588 int try_dos = (vim_strchr(p_ffs, 'd') != NULL); 589 int try_unix = (vim_strchr(p_ffs, 'x') != NULL); 590 591 if (!read_buffer) { 592 int m = msg_scroll; 593 int n = msg_scrolled; 594 595 // The file must be closed again, the autocommands may want to change 596 // the file before reading it. 597 if (!read_stdin) { 598 close(fd); // ignore errors 599 } 600 601 // The output from the autocommands should not overwrite anything and 602 // should not be overwritten: Set msg_scroll, restore its value if no 603 // output was done. 604 msg_scroll = true; 605 if (filtering) { 606 apply_autocmds_exarg(EVENT_FILTERREADPRE, NULL, sfname, 607 false, curbuf, eap); 608 } else if (read_stdin) { 609 apply_autocmds_exarg(EVENT_STDINREADPRE, NULL, sfname, 610 false, curbuf, eap); 611 } else if (newfile) { 612 apply_autocmds_exarg(EVENT_BUFREADPRE, NULL, sfname, 613 false, curbuf, eap); 614 } else { 615 apply_autocmds_exarg(EVENT_FILEREADPRE, sfname, sfname, 616 false, NULL, eap); 617 } 618 619 // autocommands may have changed it 620 try_mac = (vim_strchr(p_ffs, 'm') != NULL); 621 try_dos = (vim_strchr(p_ffs, 'd') != NULL); 622 try_unix = (vim_strchr(p_ffs, 'x') != NULL); 623 curbuf->b_op_start = orig_start; 624 625 if (msg_scrolled == n) { 626 msg_scroll = m; 627 } 628 629 if (aborting()) { // autocmds may abort script processing 630 no_wait_return--; 631 msg_scroll = msg_save; 632 curbuf->b_p_ro = true; // must use "w!" now 633 goto theend; 634 } 635 // Don't allow the autocommands to change the current buffer. 636 // Try to re-open the file. 637 // 638 // Don't allow the autocommands to change the buffer name either 639 // (cd for example) if it invalidates fname or sfname. 640 if (!read_stdin && (curbuf != old_curbuf 641 || (using_b_ffname && (old_b_ffname != curbuf->b_ffname)) 642 || (using_b_fname && (old_b_fname != curbuf->b_fname)) 643 || (fd = os_open(fname, O_RDONLY, 0)) < 0)) { 644 no_wait_return--; 645 msg_scroll = msg_save; 646 if (fd < 0) { 647 emsg(_("E200: *ReadPre autocommands made the file unreadable")); 648 } else { 649 emsg(_("E201: *ReadPre autocommands must not change current buffer")); 650 } 651 curbuf->b_p_ro = true; // must use "w!" now 652 goto theend; 653 } 654 } 655 656 // Autocommands may add lines to the file, need to check if it is empty 657 wasempty = (curbuf->b_ml.ml_flags & ML_EMPTY); 658 659 if (!recoverymode && !filtering && !(flags & READ_DUMMY) && !silent) { 660 if (!read_stdin && !read_buffer) { 661 filemess(curbuf, sfname, ""); 662 } 663 } 664 665 msg_scroll = false; // overwrite the file message 666 667 // Set linecnt now, before the "retry" caused by a wrong guess for 668 // fileformat, and after the autocommands, which may change them. 669 linecnt = curbuf->b_ml.ml_line_count; 670 671 // "++bad=" argument. 672 if (eap != NULL && eap->bad_char != 0) { 673 bad_char_behavior = eap->bad_char; 674 if (set_options) { 675 curbuf->b_bad_char = eap->bad_char; 676 } 677 } else { 678 curbuf->b_bad_char = 0; 679 } 680 681 // Decide which 'encoding' to use or use first. 682 if (eap != NULL && eap->force_enc != 0) { 683 fenc = enc_canonize(eap->cmd + eap->force_enc); 684 fenc_alloced = true; 685 keep_dest_enc = true; 686 } else if (curbuf->b_p_bin) { 687 fenc = ""; // binary: don't convert 688 fenc_alloced = false; 689 } else if (curbuf->b_help) { 690 // Help files are either utf-8 or latin1. Try utf-8 first, if this 691 // fails it must be latin1. 692 // It is needed when the first line contains non-ASCII characters. 693 // That is only in *.??x files. 694 fenc_next = "latin1"; 695 fenc = "utf-8"; 696 697 fenc_alloced = false; 698 } else if (*p_fencs == NUL) { 699 fenc = curbuf->b_p_fenc; // use format from buffer 700 fenc_alloced = false; 701 } else { 702 fenc_next = p_fencs; // try items in 'fileencodings' 703 fenc = next_fenc(&fenc_next, &fenc_alloced); 704 } 705 706 // Jump back here to retry reading the file in different ways. 707 // Reasons to retry: 708 // - encoding conversion failed: try another one from "fenc_next" 709 // - BOM detected and fenc was set, need to setup conversion 710 // - "fileformat" check failed: try another 711 // 712 // Variables set for special retry actions: 713 // "file_rewind" Rewind the file to start reading it again. 714 // "advance_fenc" Advance "fenc" using "fenc_next". 715 // "skip_read" Re-use already read bytes (BOM detected). 716 // "did_iconv" iconv() conversion failed, try 'charconvert'. 717 // "keep_fileformat" Don't reset "fileformat". 718 // 719 // Other status indicators: 720 // "tmpname" When != NULL did conversion with 'charconvert'. 721 // Output file has to be deleted afterwards. 722 // "iconv_fd" When != -1 did conversion with iconv(). 723 retry: 724 725 if (file_rewind) { 726 if (read_buffer) { 727 read_buf_lnum = 1; 728 read_buf_col = 0; 729 } else if (read_stdin || vim_lseek(fd, 0, SEEK_SET) != 0) { 730 // Can't rewind the file, give up. 731 error = true; 732 goto failed; 733 } 734 // Delete the previously read lines. 735 while (lnum > from) { 736 ml_delete(lnum--); 737 } 738 file_rewind = false; 739 if (set_options) { 740 curbuf->b_p_bomb = false; 741 curbuf->b_start_bomb = false; 742 } 743 conv_error = 0; 744 } 745 746 // When retrying with another "fenc" and the first time "fileformat" 747 // will be reset. 748 if (keep_fileformat) { 749 keep_fileformat = false; 750 } else { 751 if (eap != NULL && eap->force_ff != 0) { 752 fileformat = get_fileformat_force(curbuf, eap); 753 try_unix = try_dos = try_mac = false; 754 } else if (curbuf->b_p_bin) { 755 fileformat = EOL_UNIX; // binary: use Unix format 756 } else if (*p_ffs == 757 NUL) { 758 fileformat = get_fileformat(curbuf); // use format from buffer 759 } else { 760 fileformat = EOL_UNKNOWN; // detect from file 761 } 762 } 763 764 if (iconv_fd != (iconv_t)-1) { 765 // aborted conversion with iconv(), close the descriptor 766 iconv_close(iconv_fd); 767 iconv_fd = (iconv_t)-1; 768 } 769 770 if (advance_fenc) { 771 // Try the next entry in 'fileencodings'. 772 advance_fenc = false; 773 774 if (eap != NULL && eap->force_enc != 0) { 775 // Conversion given with "++cc=" wasn't possible, read 776 // without conversion. 777 notconverted = true; 778 conv_error = 0; 779 if (fenc_alloced) { 780 xfree(fenc); 781 } 782 fenc = ""; 783 fenc_alloced = false; 784 } else { 785 if (fenc_alloced) { 786 xfree(fenc); 787 } 788 if (fenc_next != NULL) { 789 fenc = next_fenc(&fenc_next, &fenc_alloced); 790 } else { 791 fenc = ""; 792 fenc_alloced = false; 793 } 794 } 795 if (tmpname != NULL) { 796 os_remove(tmpname); // delete converted file 797 XFREE_CLEAR(tmpname); 798 } 799 } 800 801 // Conversion may be required when the encoding of the file is different 802 // from 'encoding' or 'encoding' is UTF-16, UCS-2 or UCS-4. 803 fio_flags = 0; 804 converted = need_conversion(fenc); 805 if (converted) { 806 // "ucs-bom" means we need to check the first bytes of the file 807 // for a BOM. 808 if (strcmp(fenc, ENC_UCSBOM) == 0) { 809 fio_flags = FIO_UCSBOM; 810 } else { 811 // Check if UCS-2/4 or Latin1 to UTF-8 conversion needs to be 812 // done. This is handled below after read(). Prepare the 813 // fio_flags to avoid having to parse the string each time. 814 // Also check for Unicode to Latin1 conversion, because iconv() 815 // appears not to handle this correctly. This works just like 816 // conversion to UTF-8 except how the resulting character is put in 817 // the buffer. 818 fio_flags = get_fio_flags(fenc); 819 } 820 821 // Try using iconv() if we can't convert internally. 822 if (fio_flags == 0 823 && !did_iconv) { 824 iconv_fd = (iconv_t)my_iconv_open("utf-8", fenc); 825 } 826 827 // Use the 'charconvert' expression when conversion is required 828 // and we can't do it internally or with iconv(). 829 if (fio_flags == 0 && !read_stdin && !read_buffer && *p_ccv != NUL 830 && !read_fifo && iconv_fd == (iconv_t)-1) { 831 did_iconv = false; 832 // Skip conversion when it's already done (retry for wrong 833 // "fileformat"). 834 if (tmpname == NULL) { 835 tmpname = readfile_charconvert(fname, fenc, &fd); 836 if (tmpname == NULL) { 837 // Conversion failed. Try another one. 838 advance_fenc = true; 839 if (fd < 0) { 840 // Re-opening the original file failed! 841 emsg(_("E202: Conversion made file unreadable!")); 842 error = true; 843 goto failed; 844 } 845 goto retry; 846 } 847 } 848 } else { 849 if (fio_flags == 0 && iconv_fd == (iconv_t)-1) { 850 // Conversion wanted but we can't. 851 // Try the next conversion in 'fileencodings' 852 advance_fenc = true; 853 goto retry; 854 } 855 } 856 } 857 858 // Set "can_retry" when it's possible to rewind the file and try with 859 // another "fenc" value. It's false when no other "fenc" to try, reading 860 // stdin or fixed at a specific encoding. 861 bool can_retry = (*fenc != NUL && !read_stdin && !keep_dest_enc && !read_fifo); 862 863 if (!skip_read) { 864 linerest = 0; 865 filesize = 0; 866 skip_count = lines_to_skip; 867 read_count = lines_to_read; 868 conv_restlen = 0; 869 read_undo_file = (newfile && (flags & READ_KEEP_UNDO) == 0 870 && curbuf->b_ffname != NULL 871 && curbuf->b_p_udf 872 && !filtering 873 && !read_fifo 874 && !read_stdin 875 && !read_buffer); 876 if (read_undo_file) { 877 sha256_start(&sha_ctx); 878 } 879 } 880 881 while (!error && !got_int) { 882 // We allocate as much space for the file as we can get, plus 883 // space for the old line plus room for one terminating NUL. 884 // The amount is limited by the fact that read() only can read 885 // up to max_unsigned characters (and other things). 886 { 887 if (!skip_read) { 888 // Use buffer >= 64K. Add linerest to double the size if the 889 // line gets very long, to avoid a lot of copying. But don't 890 // read more than 1 Mbyte at a time, so we can be interrupted. 891 size = MIN(0x10000 + linerest, 0x100000); 892 } 893 894 // Protect against the argument of lalloc() going negative. 895 // Also split lines that are too long for colnr_T. After this check 896 // passes, we read up to 'size' more bytes. We must ensure that even 897 // after that read, the line length won't exceed MAXCOL - 1 (because 898 // we add 1 for the NUL when casting to colnr_T). If this check fires, 899 // we insert a synthetic newline immediately, so linerest doesn't grow. 900 if (size < 0 || size + linerest + 1 < 0 || linerest >= MAXCOL - size) { 901 split++; 902 *ptr = NL; // split line by inserting a NL 903 size = 1; 904 } else if (!skip_read) { 905 for (; size >= 10; size /= 2) { 906 new_buffer = verbose_try_malloc((size_t)size + (size_t)linerest + 1); 907 if (new_buffer) { 908 break; 909 } 910 } 911 if (new_buffer == NULL) { 912 error = true; 913 break; 914 } 915 if (linerest) { // copy characters from the previous buffer 916 memmove(new_buffer, ptr - linerest, (size_t)linerest); 917 } 918 xfree(buffer); 919 buffer = new_buffer; 920 ptr = buffer + linerest; 921 line_start = buffer; 922 923 // May need room to translate into. 924 // For iconv() we don't really know the required space, use a 925 // factor ICONV_MULT. 926 // latin1 to utf-8: 1 byte becomes up to 2 bytes 927 // utf-16 to utf-8: 2 bytes become up to 3 bytes, 4 bytes 928 // become up to 4 bytes, size must be multiple of 2 929 // ucs-2 to utf-8: 2 bytes become up to 3 bytes, size must be 930 // multiple of 2 931 // ucs-4 to utf-8: 4 bytes become up to 6 bytes, size must be 932 // multiple of 4 933 real_size = (int)size; 934 if (iconv_fd != (iconv_t)-1) { 935 size = size / ICONV_MULT; 936 } else if (fio_flags & FIO_LATIN1) { 937 size = size / 2; 938 } else if (fio_flags & (FIO_UCS2 | FIO_UTF16)) { 939 size = (size * 2 / 3) & ~1; 940 } else if (fio_flags & FIO_UCS4) { 941 size = (size * 2 / 3) & ~3; 942 } else if (fio_flags == FIO_UCSBOM) { 943 size = size / ICONV_MULT; // worst case 944 } 945 946 if (conv_restlen > 0) { 947 // Insert unconverted bytes from previous line. 948 memmove(ptr, conv_rest, (size_t)conv_restlen); 949 ptr += conv_restlen; 950 size -= conv_restlen; 951 } 952 953 if (read_buffer) { 954 // Read bytes from curbuf. Used for converting text read 955 // from stdin. 956 if (read_buf_lnum > from) { 957 size = 0; 958 } else { 959 int ni; 960 int tlen = 0; 961 while (true) { 962 p = (uint8_t *)ml_get(read_buf_lnum) + read_buf_col; 963 int n = ml_get_len(read_buf_lnum) - read_buf_col; 964 if (tlen + n + 1 > size) { 965 // Filled up to "size", append partial line. 966 // Change NL to NUL to reverse the effect done 967 // below. 968 n = (int)(size - tlen); 969 for (ni = 0; ni < n; ni++) { 970 if (p[ni] == NL) { 971 ptr[tlen++] = NUL; 972 } else { 973 ptr[tlen++] = (char)p[ni]; 974 } 975 } 976 read_buf_col += n; 977 break; 978 } 979 980 // Append whole line and new-line. Change NL 981 // to NUL to reverse the effect done below. 982 for (ni = 0; ni < n; ni++) { 983 if (p[ni] == NL) { 984 ptr[tlen++] = NUL; 985 } else { 986 ptr[tlen++] = (char)p[ni]; 987 } 988 } 989 ptr[tlen++] = NL; 990 read_buf_col = 0; 991 if (++read_buf_lnum > from) { 992 // When the last line didn't have an 993 // end-of-line don't add it now either. 994 if (!curbuf->b_p_eol) { 995 tlen--; 996 } 997 size = tlen; 998 break; 999 } 1000 } 1001 } 1002 } else { 1003 // Read bytes from the file. 1004 size_t read_size = (size_t)size; 1005 size = read_eintr(fd, ptr, read_size); 1006 } 1007 1008 if (size <= 0) { 1009 if (size < 0) { // read error 1010 error = true; 1011 } else if (conv_restlen > 0) { 1012 // Reached end-of-file but some trailing bytes could 1013 // not be converted. Truncated file? 1014 1015 // When we did a conversion report an error. 1016 if (fio_flags != 0 || iconv_fd != (iconv_t)-1) { 1017 if (can_retry) { 1018 goto rewind_retry; 1019 } 1020 if (conv_error == 0) { 1021 conv_error = curbuf->b_ml.ml_line_count 1022 - linecnt + 1; 1023 } 1024 } else if (illegal_byte == 0) { 1025 // Remember the first linenr with an illegal byte 1026 illegal_byte = curbuf->b_ml.ml_line_count 1027 - linecnt + 1; 1028 } 1029 if (bad_char_behavior == BAD_DROP) { 1030 *(ptr - conv_restlen) = NUL; 1031 conv_restlen = 0; 1032 } else { 1033 // Replace the trailing bytes with the replacement 1034 // character if we were converting; if we weren't, 1035 // leave the UTF8 checking code to do it, as it 1036 // works slightly differently. 1037 if (bad_char_behavior != BAD_KEEP && (fio_flags != 0 || iconv_fd != (iconv_t)-1)) { 1038 while (conv_restlen > 0) { 1039 *(--ptr) = (char)bad_char_behavior; 1040 conv_restlen--; 1041 } 1042 } 1043 fio_flags = 0; // don't convert this 1044 if (iconv_fd != (iconv_t)-1) { 1045 iconv_close(iconv_fd); 1046 iconv_fd = (iconv_t)-1; 1047 } 1048 } 1049 } 1050 } 1051 } 1052 1053 skip_read = false; 1054 1055 // At start of file: Check for BOM. 1056 // Also check for a BOM for other Unicode encodings, but not after 1057 // converting with 'charconvert' or when a BOM has already been 1058 // found. 1059 if ((filesize == 0) 1060 && (fio_flags == FIO_UCSBOM 1061 || (!curbuf->b_p_bomb 1062 && tmpname == NULL 1063 && (*fenc == 'u' || *fenc == NUL)))) { 1064 char *ccname; 1065 int blen = 0; 1066 1067 // no BOM detection in a short file or in binary mode 1068 if (size < 2 || curbuf->b_p_bin) { 1069 ccname = NULL; 1070 } else { 1071 ccname = check_for_bom(ptr, (int)size, &blen, 1072 fio_flags == FIO_UCSBOM ? FIO_ALL : get_fio_flags(fenc)); 1073 } 1074 if (ccname != NULL) { 1075 // Remove BOM from the text 1076 filesize += blen; 1077 size -= blen; 1078 memmove(ptr, ptr + blen, (size_t)size); 1079 if (set_options) { 1080 curbuf->b_p_bomb = true; 1081 curbuf->b_start_bomb = true; 1082 } 1083 } 1084 1085 if (fio_flags == FIO_UCSBOM) { 1086 if (ccname == NULL) { 1087 // No BOM detected: retry with next encoding. 1088 advance_fenc = true; 1089 } else { 1090 // BOM detected: set "fenc" and jump back 1091 if (fenc_alloced) { 1092 xfree(fenc); 1093 } 1094 fenc = ccname; 1095 fenc_alloced = false; 1096 } 1097 // retry reading without getting new bytes or rewinding 1098 skip_read = true; 1099 goto retry; 1100 } 1101 } 1102 1103 // Include not converted bytes. 1104 ptr -= conv_restlen; 1105 size += conv_restlen; 1106 conv_restlen = 0; 1107 // Break here for a read error or end-of-file. 1108 if (size <= 0) { 1109 break; 1110 } 1111 1112 if (iconv_fd != (iconv_t)-1) { 1113 // Attempt conversion of the read bytes to 'encoding' using iconv(). 1114 const char *fromp = ptr; 1115 size_t from_size = (size_t)size; 1116 ptr += size; 1117 char *top = ptr; 1118 size_t to_size = (size_t)(real_size - size); 1119 1120 // If there is conversion error or not enough room try using 1121 // another conversion. Except for when there is no 1122 // alternative (help files). 1123 while ((iconv(iconv_fd, (void *)&fromp, &from_size, 1124 &top, &to_size) 1125 == (size_t)-1 && ICONV_ERRNO != ICONV_EINVAL) 1126 || from_size > CONV_RESTLEN) { 1127 if (can_retry) { 1128 goto rewind_retry; 1129 } 1130 if (conv_error == 0) { 1131 conv_error = readfile_linenr(linecnt, ptr, top); 1132 } 1133 1134 // Deal with a bad byte and continue with the next. 1135 fromp++; 1136 from_size--; 1137 if (bad_char_behavior == BAD_KEEP) { 1138 *top++ = *(fromp - 1); 1139 to_size--; 1140 } else if (bad_char_behavior != BAD_DROP) { 1141 *top++ = (char)bad_char_behavior; 1142 to_size--; 1143 } 1144 } 1145 1146 if (from_size > 0) { 1147 // Some remaining characters, keep them for the next 1148 // round. 1149 memmove(conv_rest, fromp, from_size); 1150 conv_restlen = (int)from_size; 1151 } 1152 1153 // move the linerest to before the converted characters 1154 line_start = ptr - linerest; 1155 memmove(line_start, buffer, (size_t)linerest); 1156 size = (top - ptr); 1157 } 1158 1159 if (fio_flags != 0) { 1160 unsigned u8c; 1161 char *tail = NULL; 1162 1163 // Convert Unicode or Latin1 to UTF-8. 1164 // Go from end to start through the buffer, because the number 1165 // of bytes may increase. 1166 // "dest" points to after where the UTF-8 bytes go, "p" points 1167 // to after the next character to convert. 1168 char *dest = ptr + real_size; 1169 if (fio_flags == FIO_LATIN1 || fio_flags == FIO_UTF8) { 1170 p = (uint8_t *)ptr + size; 1171 if (fio_flags == FIO_UTF8) { 1172 // Check for a trailing incomplete UTF-8 sequence 1173 tail = ptr + size - 1; 1174 while (tail > ptr && (*tail & 0xc0) == 0x80) { 1175 tail--; 1176 } 1177 if (tail + utf_byte2len(*tail) <= ptr + size) { 1178 tail = NULL; 1179 } else { 1180 p = (uint8_t *)tail; 1181 } 1182 } 1183 } else if (fio_flags & (FIO_UCS2 | FIO_UTF16)) { 1184 // Check for a trailing byte 1185 p = (uint8_t *)ptr + (size & ~1); 1186 if (size & 1) { 1187 tail = (char *)p; 1188 } 1189 if ((fio_flags & FIO_UTF16) && p > (uint8_t *)ptr) { 1190 // Check for a trailing leading word 1191 if (fio_flags & FIO_ENDIAN_L) { 1192 u8c = (unsigned)(*--p) << 8; 1193 u8c += *--p; 1194 } else { 1195 u8c = *--p; 1196 u8c += (unsigned)(*--p) << 8; 1197 } 1198 if (u8c >= 0xd800 && u8c <= 0xdbff) { 1199 tail = (char *)p; 1200 } else { 1201 p += 2; 1202 } 1203 } 1204 } else { // FIO_UCS4 1205 // Check for trailing 1, 2 or 3 bytes 1206 p = (uint8_t *)ptr + (size & ~3); 1207 if (size & 3) { 1208 tail = (char *)p; 1209 } 1210 } 1211 1212 // If there is a trailing incomplete sequence move it to 1213 // conv_rest[]. 1214 if (tail != NULL) { 1215 conv_restlen = (int)((ptr + size) - tail); 1216 memmove(conv_rest, tail, (size_t)conv_restlen); 1217 size -= conv_restlen; 1218 } 1219 1220 while (p > (uint8_t *)ptr) { 1221 if (fio_flags & FIO_LATIN1) { 1222 u8c = *--p; 1223 } else if (fio_flags & (FIO_UCS2 | FIO_UTF16)) { 1224 if (fio_flags & FIO_ENDIAN_L) { 1225 u8c = (unsigned)(*--p) << 8; 1226 u8c += *--p; 1227 } else { 1228 u8c = *--p; 1229 u8c += (unsigned)(*--p) << 8; 1230 } 1231 if ((fio_flags & FIO_UTF16) 1232 && u8c >= 0xdc00 && u8c <= 0xdfff) { 1233 int u16c; 1234 1235 if (p == (uint8_t *)ptr) { 1236 // Missing leading word. 1237 if (can_retry) { 1238 goto rewind_retry; 1239 } 1240 if (conv_error == 0) { 1241 conv_error = readfile_linenr(linecnt, ptr, (char *)p); 1242 } 1243 if (bad_char_behavior == BAD_DROP) { 1244 continue; 1245 } 1246 if (bad_char_behavior != BAD_KEEP) { 1247 u8c = (unsigned)bad_char_behavior; 1248 } 1249 } 1250 1251 // found second word of double-word, get the first 1252 // word and compute the resulting character 1253 if (fio_flags & FIO_ENDIAN_L) { 1254 u16c = (*--p << 8); 1255 u16c += *--p; 1256 } else { 1257 u16c = *--p; 1258 u16c += (*--p << 8); 1259 } 1260 u8c = 0x10000 + (((unsigned)u16c & 0x3ff) << 10) 1261 + (u8c & 0x3ff); 1262 1263 // Check if the word is indeed a leading word. 1264 if (u16c < 0xd800 || u16c > 0xdbff) { 1265 if (can_retry) { 1266 goto rewind_retry; 1267 } 1268 if (conv_error == 0) { 1269 conv_error = readfile_linenr(linecnt, ptr, (char *)p); 1270 } 1271 if (bad_char_behavior == BAD_DROP) { 1272 continue; 1273 } 1274 if (bad_char_behavior != BAD_KEEP) { 1275 u8c = (unsigned)bad_char_behavior; 1276 } 1277 } 1278 } 1279 } else if (fio_flags & FIO_UCS4) { 1280 if (fio_flags & FIO_ENDIAN_L) { 1281 u8c = (unsigned)(*--p) << 24; 1282 u8c += (unsigned)(*--p) << 16; 1283 u8c += (unsigned)(*--p) << 8; 1284 u8c += *--p; 1285 } else { // big endian 1286 u8c = *--p; 1287 u8c += (unsigned)(*--p) << 8; 1288 u8c += (unsigned)(*--p) << 16; 1289 u8c += (unsigned)(*--p) << 24; 1290 } 1291 // Replace characters over INT_MAX with Unicode replacement character 1292 if (u8c > INT_MAX) { 1293 u8c = 0xfffd; 1294 } 1295 } else { // UTF-8 1296 if (*--p < 0x80) { 1297 u8c = *p; 1298 } else { 1299 len = utf_head_off(ptr, (char *)p); 1300 p -= len; 1301 u8c = (unsigned)utf_ptr2char((char *)p); 1302 if (len == 0) { 1303 // Not a valid UTF-8 character, retry with 1304 // another fenc when possible, otherwise just 1305 // report the error. 1306 if (can_retry) { 1307 goto rewind_retry; 1308 } 1309 if (conv_error == 0) { 1310 conv_error = readfile_linenr(linecnt, ptr, (char *)p); 1311 } 1312 if (bad_char_behavior == BAD_DROP) { 1313 continue; 1314 } 1315 if (bad_char_behavior != BAD_KEEP) { 1316 u8c = (unsigned)bad_char_behavior; 1317 } 1318 } 1319 } 1320 } 1321 assert(u8c <= INT_MAX); 1322 // produce UTF-8 1323 dest -= utf_char2len((int)u8c); 1324 utf_char2bytes((int)u8c, dest); 1325 } 1326 1327 // move the linerest to before the converted characters 1328 line_start = dest - linerest; 1329 memmove(line_start, buffer, (size_t)linerest); 1330 size = ((ptr + real_size) - dest); 1331 ptr = dest; 1332 } else if (!curbuf->b_p_bin) { 1333 bool incomplete_tail = false; 1334 1335 // Reading UTF-8: Check if the bytes are valid UTF-8. 1336 for (p = (uint8_t *)ptr;; p++) { 1337 int todo = (int)(((uint8_t *)ptr + size) - p); 1338 1339 if (todo <= 0) { 1340 break; 1341 } 1342 if (*p >= 0x80) { 1343 // A length of 1 means it's an illegal byte. Accept 1344 // an incomplete character at the end though, the next 1345 // read() will get the next bytes, we'll check it 1346 // then. 1347 int l = utf_ptr2len_len((char *)p, todo); 1348 if (l > todo && !incomplete_tail) { 1349 // Avoid retrying with a different encoding when 1350 // a truncated file is more likely, or attempting 1351 // to read the rest of an incomplete sequence when 1352 // we have already done so. 1353 if (p > (uint8_t *)ptr || filesize > 0) { 1354 incomplete_tail = true; 1355 } 1356 // Incomplete byte sequence, move it to conv_rest[] 1357 // and try to read the rest of it, unless we've 1358 // already done so. 1359 if (p > (uint8_t *)ptr) { 1360 conv_restlen = todo; 1361 memmove(conv_rest, p, (size_t)conv_restlen); 1362 size -= conv_restlen; 1363 break; 1364 } 1365 } 1366 if (l == 1 || l > todo) { 1367 // Illegal byte. If we can try another encoding 1368 // do that, unless at EOF where a truncated 1369 // file is more likely than a conversion error. 1370 if (can_retry && !incomplete_tail) { 1371 break; 1372 } 1373 1374 // When we did a conversion report an error. 1375 if (iconv_fd != (iconv_t)-1 && conv_error == 0) { 1376 conv_error = readfile_linenr(linecnt, ptr, (char *)p); 1377 } 1378 1379 // Remember the first linenr with an illegal byte 1380 if (conv_error == 0 && illegal_byte == 0) { 1381 illegal_byte = readfile_linenr(linecnt, ptr, (char *)p); 1382 } 1383 1384 // Drop, keep or replace the bad byte. 1385 if (bad_char_behavior == BAD_DROP) { 1386 memmove(p, p + 1, (size_t)(todo - 1)); 1387 p--; 1388 size--; 1389 } else if (bad_char_behavior != BAD_KEEP) { 1390 *p = (uint8_t)bad_char_behavior; 1391 } 1392 } else { 1393 p += l - 1; 1394 } 1395 } 1396 } 1397 if (p < (uint8_t *)ptr + size && !incomplete_tail) { 1398 // Detected a UTF-8 error. 1399 rewind_retry: 1400 // Retry reading with another conversion. 1401 if (*p_ccv != NUL && iconv_fd != (iconv_t)-1) { 1402 // iconv() failed, try 'charconvert' 1403 did_iconv = true; 1404 } else { 1405 // use next item from 'fileencodings' 1406 advance_fenc = true; 1407 } 1408 file_rewind = true; 1409 goto retry; 1410 } 1411 } 1412 1413 // count the number of characters (after conversion!) 1414 filesize += size; 1415 1416 // when reading the first part of a file: guess EOL type 1417 if (fileformat == EOL_UNKNOWN) { 1418 // First try finding a NL, for Dos and Unix 1419 if (try_dos || try_unix) { 1420 // Reset the carriage return counter. 1421 if (try_mac) { 1422 try_mac = 1; 1423 } 1424 1425 for (p = (uint8_t *)ptr; p < (uint8_t *)ptr + size; p++) { 1426 if (*p == NL) { 1427 if (!try_unix 1428 || (try_dos && p > (uint8_t *)ptr && p[-1] == CAR)) { 1429 fileformat = EOL_DOS; 1430 } else { 1431 fileformat = EOL_UNIX; 1432 } 1433 break; 1434 } else if (*p == CAR && try_mac) { 1435 try_mac++; 1436 } 1437 } 1438 1439 // Don't give in to EOL_UNIX if EOL_MAC is more likely 1440 if (fileformat == EOL_UNIX && try_mac) { 1441 // Need to reset the counters when retrying fenc. 1442 try_mac = 1; 1443 try_unix = 1; 1444 for (; p >= (uint8_t *)ptr && *p != CAR; p--) {} 1445 if (p >= (uint8_t *)ptr) { 1446 for (p = (uint8_t *)ptr; p < (uint8_t *)ptr + size; p++) { 1447 if (*p == NL) { 1448 try_unix++; 1449 } else if (*p == CAR) { 1450 try_mac++; 1451 } 1452 } 1453 if (try_mac > try_unix) { 1454 fileformat = EOL_MAC; 1455 } 1456 } 1457 } else if (fileformat == EOL_UNKNOWN && try_mac == 1) { 1458 // Looking for CR but found no end-of-line markers at all: 1459 // use the default format. 1460 fileformat = default_fileformat(); 1461 } 1462 } 1463 1464 // No NL found: may use Mac format 1465 if (fileformat == EOL_UNKNOWN && try_mac) { 1466 fileformat = EOL_MAC; 1467 } 1468 1469 // Still nothing found? Use first format in 'ffs' 1470 if (fileformat == EOL_UNKNOWN) { 1471 fileformat = default_fileformat(); 1472 } 1473 1474 // May set 'p_ff' if editing a new file. 1475 if (set_options) { 1476 set_fileformat(fileformat, OPT_LOCAL); 1477 } 1478 } 1479 } 1480 1481 // This loop is executed once for every character read. 1482 // Keep it fast! 1483 if (fileformat == EOL_MAC) { 1484 ptr--; 1485 while (++ptr, --size >= 0) { 1486 // catch most common case first 1487 if ((c = *ptr) != NUL && c != CAR && c != NL) { 1488 continue; 1489 } 1490 if (c == NUL) { 1491 *ptr = NL; // NULs are replaced by newlines! 1492 } else if (c == NL) { 1493 *ptr = CAR; // NLs are replaced by CRs! 1494 } else { 1495 if (skip_count == 0) { 1496 *ptr = NUL; // end of line 1497 len = (colnr_T)(ptr - line_start + 1); 1498 if (ml_append(lnum, line_start, len, newfile) == FAIL) { 1499 error = true; 1500 break; 1501 } 1502 if (read_undo_file) { 1503 sha256_update(&sha_ctx, (uint8_t *)line_start, (size_t)len); 1504 } 1505 lnum++; 1506 if (--read_count == 0) { 1507 error = true; // break loop 1508 line_start = ptr; // nothing left to write 1509 break; 1510 } 1511 } else { 1512 skip_count--; 1513 } 1514 line_start = ptr + 1; 1515 } 1516 } 1517 } else { 1518 ptr--; 1519 while (++ptr, --size >= 0) { 1520 if ((c = *ptr) != NUL && c != NL) { // catch most common case 1521 continue; 1522 } 1523 if (c == NUL) { 1524 *ptr = NL; // NULs are replaced by newlines! 1525 } else { 1526 if (skip_count == 0) { 1527 *ptr = NUL; // end of line 1528 len = (colnr_T)(ptr - line_start + 1); 1529 if (fileformat == EOL_DOS) { 1530 if (ptr > line_start && ptr[-1] == CAR) { 1531 // remove CR before NL 1532 ptr[-1] = NUL; 1533 len--; 1534 } else if (ff_error != EOL_DOS) { 1535 // Reading in Dos format, but no CR-LF found! 1536 // When 'fileformats' includes "unix", delete all 1537 // the lines read so far and start all over again. 1538 // Otherwise give an error message later. 1539 if (try_unix 1540 && !read_stdin 1541 && (read_buffer || vim_lseek(fd, 0, SEEK_SET) == 0)) { 1542 fileformat = EOL_UNIX; 1543 if (set_options) { 1544 set_fileformat(EOL_UNIX, OPT_LOCAL); 1545 } 1546 file_rewind = true; 1547 keep_fileformat = true; 1548 goto retry; 1549 } 1550 ff_error = EOL_DOS; 1551 } 1552 } 1553 if (ml_append(lnum, line_start, len, newfile) == FAIL) { 1554 error = true; 1555 break; 1556 } 1557 if (read_undo_file) { 1558 sha256_update(&sha_ctx, (uint8_t *)line_start, (size_t)len); 1559 } 1560 lnum++; 1561 if (--read_count == 0) { 1562 error = true; // break loop 1563 line_start = ptr; // nothing left to write 1564 break; 1565 } 1566 } else { 1567 skip_count--; 1568 } 1569 line_start = ptr + 1; 1570 } 1571 } 1572 } 1573 linerest = (ptr - line_start); 1574 os_breakcheck(); 1575 } 1576 1577 failed: 1578 // not an error, max. number of lines reached 1579 if (error && read_count == 0) { 1580 error = false; 1581 } 1582 1583 // In Dos format ignore a trailing CTRL-Z, unless 'binary' is set. 1584 // In old days the file length was in sector count and the CTRL-Z the 1585 // marker where the file really ended. Assuming we write it to a file 1586 // system that keeps file length properly the CTRL-Z should be dropped. 1587 // Set the 'endoffile' option so the user can decide what to write later. 1588 // In Unix format the CTRL-Z is just another character. 1589 if (linerest != 0 1590 && !curbuf->b_p_bin 1591 && fileformat == EOL_DOS 1592 && ptr[-1] == Ctrl_Z) { 1593 ptr--; 1594 linerest--; 1595 if (set_options) { 1596 curbuf->b_p_eof = true; 1597 } 1598 } 1599 1600 // If we get EOF in the middle of a line, note the fact and 1601 // complete the line ourselves. 1602 if (!error 1603 && !got_int 1604 && linerest != 0) { 1605 // remember for when writing 1606 if (set_options) { 1607 curbuf->b_p_eol = false; 1608 } 1609 *ptr = NUL; 1610 len = (colnr_T)(ptr - line_start + 1); 1611 if (ml_append(lnum, line_start, len, newfile) == FAIL) { 1612 error = true; 1613 } else { 1614 if (read_undo_file) { 1615 sha256_update(&sha_ctx, (uint8_t *)line_start, (size_t)len); 1616 } 1617 read_no_eol_lnum = ++lnum; 1618 } 1619 } 1620 1621 if (set_options) { 1622 // Remember the current file format. 1623 save_file_ff(curbuf); 1624 // If editing a new file: set 'fenc' for the current buffer. 1625 // Also for ":read ++edit file". 1626 set_option_direct(kOptFileencoding, CSTR_AS_OPTVAL(fenc), OPT_LOCAL, 0); 1627 } 1628 if (fenc_alloced) { 1629 xfree(fenc); 1630 } 1631 if (iconv_fd != (iconv_t)-1) { 1632 iconv_close(iconv_fd); 1633 } 1634 1635 if (!read_buffer && !read_stdin) { 1636 close(fd); // errors are ignored 1637 } else { 1638 os_set_cloexec(fd); 1639 } 1640 xfree(buffer); 1641 1642 if (read_stdin) { 1643 close(fd); 1644 if (stdin_fd < 0) { 1645 #ifndef MSWIN 1646 // On Unix, use stderr for stdin, makes shell commands work. 1647 vim_ignored = dup(2); 1648 #else 1649 // On Windows, use the console input handle for stdin. 1650 HANDLE conin = CreateFile("CONIN$", GENERIC_READ | GENERIC_WRITE, 1651 FILE_SHARE_READ, (LPSECURITY_ATTRIBUTES)NULL, 1652 OPEN_EXISTING, 0, (HANDLE)NULL); 1653 vim_ignored = _open_osfhandle((intptr_t)conin, _O_RDONLY); 1654 #endif 1655 } 1656 } 1657 1658 if (tmpname != NULL) { 1659 os_remove(tmpname); // delete converted file 1660 xfree(tmpname); 1661 } 1662 no_wait_return--; // may wait for return now 1663 1664 // In recovery mode everything but autocommands is skipped. 1665 if (!recoverymode) { 1666 // need to delete the last line, which comes from the empty buffer 1667 if (newfile && wasempty && !(curbuf->b_ml.ml_flags & ML_EMPTY)) { 1668 ml_delete(curbuf->b_ml.ml_line_count); 1669 linecnt--; 1670 } 1671 curbuf->deleted_bytes = 0; 1672 curbuf->deleted_bytes2 = 0; 1673 curbuf->deleted_codepoints = 0; 1674 curbuf->deleted_codeunits = 0; 1675 linecnt = curbuf->b_ml.ml_line_count - linecnt; 1676 if (filesize == 0) { 1677 linecnt = 0; 1678 } 1679 if (newfile || read_buffer) { 1680 redraw_curbuf_later(UPD_NOT_VALID); 1681 // After reading the text into the buffer the diff info needs to 1682 // be updated. 1683 diff_invalidate(curbuf); 1684 // All folds in the window are invalid now. Mark them for update 1685 // before triggering autocommands. 1686 foldUpdateAll(curwin); 1687 } else if (linecnt) { // appended at least one line 1688 appended_lines_mark(from, linecnt); 1689 } 1690 1691 if (got_int) { 1692 if (!(flags & READ_DUMMY)) { 1693 filemess(curbuf, sfname, _(e_interr)); 1694 if (newfile) { 1695 curbuf->b_p_ro = true; // must use "w!" now 1696 } 1697 } 1698 msg_scroll = msg_save; 1699 check_marks_read(); 1700 retval = OK; // an interrupt isn't really an error 1701 goto theend; 1702 } 1703 1704 if (!filtering && !(flags & READ_DUMMY) && !silent) { 1705 add_quoted_fname(IObuff, IOSIZE, curbuf, sfname); 1706 c = false; 1707 1708 int buflen = (int)strlen(IObuff); 1709 #ifdef UNIX 1710 if (S_ISFIFO(perm)) { // fifo 1711 buflen += snprintf(IObuff + buflen, (size_t)(IOSIZE - buflen), _("[fifo]")); 1712 c = true; 1713 } 1714 if (S_ISSOCK(perm)) { // or socket 1715 buflen += snprintf(IObuff + buflen, (size_t)(IOSIZE - buflen), _("[socket]")); 1716 c = true; 1717 } 1718 # ifdef OPEN_CHR_FILES 1719 if (S_ISCHR(perm)) { // or character special 1720 buflen += snprintf(IObuff + buflen, (size_t)(IOSIZE - buflen), _("[character special]")); 1721 c = true; 1722 } 1723 # endif 1724 #endif 1725 if (curbuf->b_p_ro) { 1726 buflen += snprintf(IObuff + buflen, (size_t)(IOSIZE - buflen), "%s", 1727 shortmess(SHM_RO) ? _("[RO]") : _("[readonly]")); 1728 c = true; 1729 } 1730 if (read_no_eol_lnum) { 1731 buflen += snprintf(IObuff + buflen, (size_t)(IOSIZE - buflen), _("[noeol]")); 1732 c = true; 1733 } 1734 if (ff_error == EOL_DOS) { 1735 buflen += snprintf(IObuff + buflen, (size_t)(IOSIZE - buflen), _("[CR missing]")); 1736 c = true; 1737 } 1738 if (split) { 1739 buflen += snprintf(IObuff + buflen, (size_t)(IOSIZE - buflen), _("[long lines split]")); 1740 c = true; 1741 } 1742 if (notconverted) { 1743 buflen += snprintf(IObuff + buflen, (size_t)(IOSIZE - buflen), _("[NOT converted]")); 1744 c = true; 1745 } else if (converted) { 1746 buflen += snprintf(IObuff + buflen, (size_t)(IOSIZE - buflen), _("[converted]")); 1747 c = true; 1748 } 1749 if (conv_error != 0) { 1750 snprintf(IObuff + buflen, (size_t)(IOSIZE - buflen), 1751 _("[CONVERSION ERROR in line %" PRId64 "]"), (int64_t)conv_error); 1752 c = true; 1753 } else if (illegal_byte > 0) { 1754 snprintf(IObuff + buflen, (size_t)(IOSIZE - buflen), 1755 _("[ILLEGAL BYTE in line %" PRId64 "]"), (int64_t)illegal_byte); 1756 c = true; 1757 } else if (error) { 1758 snprintf(IObuff + buflen, (size_t)(IOSIZE - buflen), _("[READ ERRORS]")); 1759 c = true; 1760 } 1761 if (msg_add_fileformat(fileformat)) { 1762 c = true; 1763 } 1764 1765 msg_add_lines(c, linecnt, filesize); 1766 1767 XFREE_CLEAR(keep_msg); 1768 p = NULL; 1769 msg_scrolled_ign = true; 1770 1771 if (!read_stdin && !read_buffer) { 1772 if (msg_col > 0) { 1773 msg_putchar('\r'); // overwrite previous message 1774 } 1775 p = (uint8_t *)msg_trunc(IObuff, false, 0); 1776 } 1777 1778 if (read_stdin || read_buffer || restart_edit != 0 1779 || (msg_scrolled != 0 && !need_wait_return)) { 1780 // Need to repeat the message after redrawing when: 1781 // - When reading from stdin (the screen will be cleared next). 1782 // - When restart_edit is set (otherwise there will be a delay before 1783 // redrawing). 1784 // - When the screen was scrolled but there is no wait-return prompt. 1785 set_keep_msg((char *)p, 0); 1786 } 1787 msg_scrolled_ign = false; 1788 } 1789 1790 // with errors writing the file requires ":w!" 1791 if (newfile && (error 1792 || conv_error != 0 1793 || (illegal_byte > 0 && bad_char_behavior != BAD_KEEP))) { 1794 curbuf->b_p_ro = true; 1795 } 1796 1797 u_clearline(curbuf); // cannot use "U" command after adding lines 1798 1799 // In Ex mode: cursor at last new line. 1800 // Otherwise: cursor at first new line. 1801 if (exmode_active) { 1802 curwin->w_cursor.lnum = from + linecnt; 1803 } else { 1804 curwin->w_cursor.lnum = from + 1; 1805 } 1806 check_cursor_lnum(curwin); 1807 beginline(BL_WHITE | BL_FIX); // on first non-blank 1808 1809 if ((cmdmod.cmod_flags & CMOD_LOCKMARKS) == 0) { 1810 // Set '[ and '] marks to the newly read lines. 1811 curbuf->b_op_start.lnum = from + 1; 1812 curbuf->b_op_start.col = 0; 1813 curbuf->b_op_end.lnum = from + linecnt; 1814 curbuf->b_op_end.col = 0; 1815 } 1816 } 1817 msg_scroll = msg_save; 1818 1819 // Get the marks before executing autocommands, so they can be used there. 1820 check_marks_read(); 1821 1822 // We remember if the last line of the read didn't have 1823 // an eol even when 'binary' is off, to support turning 'fixeol' off, 1824 // or writing the read again with 'binary' on. The latter is required 1825 // for ":autocmd FileReadPost *.gz set bin|'[,']!gunzip" to work. 1826 curbuf->b_no_eol_lnum = read_no_eol_lnum; 1827 1828 // When reloading a buffer put the cursor at the first line that is 1829 // different. 1830 if (flags & READ_KEEP_UNDO) { 1831 u_find_first_changed(); 1832 } 1833 1834 // When opening a new file locate undo info and read it. 1835 if (read_undo_file) { 1836 uint8_t hash[UNDO_HASH_SIZE]; 1837 1838 sha256_finish(&sha_ctx, hash); 1839 u_read_undo(NULL, hash, fname); 1840 } 1841 1842 if (!read_stdin && !read_fifo && (!read_buffer || sfname != NULL)) { 1843 int m = msg_scroll; 1844 int n = msg_scrolled; 1845 1846 // Save the fileformat now, otherwise the buffer will be considered 1847 // modified if the format/encoding was automatically detected. 1848 if (set_options) { 1849 save_file_ff(curbuf); 1850 } 1851 1852 // The output from the autocommands should not overwrite anything and 1853 // should not be overwritten: Set msg_scroll, restore its value if no 1854 // output was done. 1855 msg_scroll = true; 1856 if (filtering) { 1857 apply_autocmds_exarg(EVENT_FILTERREADPOST, NULL, sfname, 1858 false, curbuf, eap); 1859 } else if (newfile || (read_buffer && sfname != NULL)) { 1860 apply_autocmds_exarg(EVENT_BUFREADPOST, NULL, sfname, 1861 false, curbuf, eap); 1862 if (!curbuf->b_au_did_filetype && *curbuf->b_p_ft != NUL) { 1863 // EVENT_FILETYPE was not triggered but the buffer already has a 1864 // filetype. Trigger EVENT_FILETYPE using the existing filetype. 1865 apply_autocmds(EVENT_FILETYPE, curbuf->b_p_ft, curbuf->b_fname, true, curbuf); 1866 } 1867 } else { 1868 apply_autocmds_exarg(EVENT_FILEREADPOST, sfname, sfname, 1869 false, NULL, eap); 1870 } 1871 if (msg_scrolled == n) { 1872 msg_scroll = m; 1873 } 1874 if (aborting()) { // autocmds may abort script processing 1875 return FAIL; 1876 } 1877 } 1878 1879 if (!(recoverymode && error)) { 1880 retval = OK; 1881 } 1882 1883 theend: 1884 if (curbuf->b_ml.ml_mfp != NULL 1885 && curbuf->b_ml.ml_mfp->mf_dirty == MF_DIRTY_YES_NOSYNC) { 1886 // OK to sync the swap file now 1887 curbuf->b_ml.ml_mfp->mf_dirty = MF_DIRTY_YES; 1888 } 1889 1890 return retval; 1891 } 1892 1893 #ifdef OPEN_CHR_FILES 1894 /// Returns true if the file name argument is of the form "/dev/fd/\d\+", 1895 /// which is the name of files used for process substitution output by 1896 /// some shells on some operating systems, e.g., bash on SunOS. 1897 /// Do not accept "/dev/fd/[012]", opening these may hang Vim. 1898 /// 1899 /// @param fname file name to check 1900 bool is_dev_fd_file(char *fname) 1901 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT 1902 { 1903 return strncmp(fname, "/dev/fd/", 8) == 0 1904 && ascii_isdigit((uint8_t)fname[8]) 1905 && *skipdigits(fname + 9) == NUL 1906 && (fname[9] != NUL 1907 || (fname[8] != '0' && fname[8] != '1' && fname[8] != '2')); 1908 } 1909 #endif 1910 1911 /// From the current line count and characters read after that, estimate the 1912 /// line number where we are now. 1913 /// Used for error messages that include a line number. 1914 /// 1915 /// @param linecnt line count before reading more bytes 1916 /// @param p start of more bytes read 1917 /// @param endp end of more bytes read 1918 static linenr_T readfile_linenr(linenr_T linecnt, char *p, const char *endp) 1919 { 1920 linenr_T lnum = curbuf->b_ml.ml_line_count - linecnt + 1; 1921 for (char *s = p; s < endp; s++) { 1922 if (*s == '\n') { 1923 lnum++; 1924 } 1925 } 1926 return lnum; 1927 } 1928 1929 /// Fill "*eap" to force the 'fileencoding', 'fileformat' and 'binary' to be 1930 /// equal to the buffer "buf". Used for calling readfile(). 1931 void prep_exarg(exarg_T *eap, const buf_T *buf) 1932 FUNC_ATTR_NONNULL_ALL 1933 { 1934 const size_t cmd_len = 15 + strlen(buf->b_p_fenc); 1935 eap->cmd = xmalloc(cmd_len); 1936 1937 snprintf(eap->cmd, cmd_len, "e ++enc=%s", buf->b_p_fenc); 1938 eap->force_enc = 8; 1939 eap->bad_char = buf->b_bad_char; 1940 eap->force_ff = (unsigned char)(*buf->b_p_ff); 1941 1942 eap->force_bin = buf->b_p_bin ? FORCE_BIN : FORCE_NOBIN; 1943 eap->read_edit = false; 1944 eap->forceit = false; 1945 } 1946 1947 /// Set default or forced 'fileformat' and 'binary'. 1948 void set_file_options(bool set_options, exarg_T *eap) 1949 { 1950 // set default 'fileformat' 1951 if (set_options) { 1952 if (eap != NULL && eap->force_ff != 0) { 1953 set_fileformat(get_fileformat_force(curbuf, eap), OPT_LOCAL); 1954 } else if (*p_ffs != NUL) { 1955 set_fileformat(default_fileformat(), OPT_LOCAL); 1956 } 1957 } 1958 1959 // set or reset 'binary' 1960 if (eap != NULL && eap->force_bin != 0) { 1961 int oldval = curbuf->b_p_bin; 1962 1963 curbuf->b_p_bin = (eap->force_bin == FORCE_BIN); 1964 set_options_bin(oldval, curbuf->b_p_bin, OPT_LOCAL); 1965 } 1966 } 1967 1968 /// Set forced 'fileencoding'. 1969 void set_forced_fenc(exarg_T *eap) 1970 { 1971 if (eap->force_enc == 0) { 1972 return; 1973 } 1974 1975 char *fenc = enc_canonize(eap->cmd + eap->force_enc); 1976 set_option_direct(kOptFileencoding, CSTR_AS_OPTVAL(fenc), OPT_LOCAL, 0); 1977 xfree(fenc); 1978 } 1979 1980 /// Find next fileencoding to use from 'fileencodings'. 1981 /// "pp" points to fenc_next. It's advanced to the next item. 1982 /// When there are no more items, an empty string is returned and *pp is set to 1983 /// NULL. 1984 /// When *pp is not set to NULL, the result is in allocated memory and "alloced" 1985 /// is set to true. 1986 static char *next_fenc(char **pp, bool *alloced) 1987 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_NONNULL_RET 1988 { 1989 char *r; 1990 1991 *alloced = false; 1992 if (**pp == NUL) { 1993 *pp = NULL; 1994 return ""; 1995 } 1996 char *p = vim_strchr(*pp, ','); 1997 if (p == NULL) { 1998 r = enc_canonize(*pp); 1999 *pp += strlen(*pp); 2000 } else { 2001 r = xmemdupz(*pp, (size_t)(p - *pp)); 2002 *pp = p + 1; 2003 p = enc_canonize(r); 2004 xfree(r); 2005 r = p; 2006 } 2007 *alloced = true; 2008 return r; 2009 } 2010 2011 /// Convert a file with the 'charconvert' expression. 2012 /// This closes the file which is to be read, converts it and opens the 2013 /// resulting file for reading. 2014 /// 2015 /// @param fname name of input file 2016 /// @param fenc converted from 2017 /// @param fdp in/out: file descriptor of file 2018 /// 2019 /// @return name of the resulting converted file (the caller should delete it after reading it). 2020 /// Returns NULL if the conversion failed ("*fdp" is not set) . 2021 static char *readfile_charconvert(char *fname, char *fenc, int *fdp) 2022 { 2023 char *errmsg = NULL; 2024 2025 char *tmpname = vim_tempname(); 2026 if (tmpname == NULL) { 2027 errmsg = _("Can't find temp file for conversion"); 2028 } else { 2029 close(*fdp); // close the input file, ignore errors 2030 *fdp = -1; 2031 if (eval_charconvert(fenc, "utf-8", 2032 fname, tmpname) == FAIL) { 2033 errmsg = _("Conversion with 'charconvert' failed"); 2034 } 2035 if (errmsg == NULL && (*fdp = os_open(tmpname, O_RDONLY, 0)) < 0) { 2036 errmsg = _("can't read output of 'charconvert'"); 2037 } 2038 } 2039 2040 if (errmsg != NULL) { 2041 // Don't use emsg(), it breaks mappings, the retry with 2042 // another type of conversion might still work. 2043 msg(errmsg, 0); 2044 if (tmpname != NULL) { 2045 os_remove(tmpname); // delete converted file 2046 XFREE_CLEAR(tmpname); 2047 } 2048 } 2049 2050 // If the input file is closed, open it (caller should check for error). 2051 if (*fdp < 0) { 2052 *fdp = os_open(fname, O_RDONLY, 0); 2053 } 2054 2055 return tmpname; 2056 } 2057 2058 /// Set the name of the current buffer. Use when the buffer doesn't have a 2059 /// name and a ":r" or ":w" command with a file name is used. 2060 int set_rw_fname(char *fname, char *sfname) 2061 { 2062 buf_T *buf = curbuf; 2063 2064 // It's like the unnamed buffer is deleted.... 2065 if (curbuf->b_p_bl) { 2066 apply_autocmds(EVENT_BUFDELETE, NULL, NULL, false, curbuf); 2067 } 2068 apply_autocmds(EVENT_BUFWIPEOUT, NULL, NULL, false, curbuf); 2069 if (aborting()) { // autocmds may abort script processing 2070 return FAIL; 2071 } 2072 if (curbuf != buf) { 2073 // We are in another buffer now, don't do the renaming. 2074 emsg(_(e_auchangedbuf)); 2075 return FAIL; 2076 } 2077 2078 if (setfname(curbuf, fname, sfname, false) == OK) { 2079 curbuf->b_flags |= BF_NOTEDITED; 2080 } 2081 2082 // ....and a new named one is created 2083 apply_autocmds(EVENT_BUFNEW, NULL, NULL, false, curbuf); 2084 if (curbuf->b_p_bl) { 2085 apply_autocmds(EVENT_BUFADD, NULL, NULL, false, curbuf); 2086 } 2087 if (aborting()) { // autocmds may abort script processing 2088 return FAIL; 2089 } 2090 2091 // Do filetype detection now if 'filetype' is empty. 2092 if (*curbuf->b_p_ft == NUL) { 2093 if (augroup_exists("filetypedetect")) { 2094 do_doautocmd("filetypedetect BufRead", false, NULL); 2095 } 2096 do_modelines(0); 2097 } 2098 2099 return OK; 2100 } 2101 2102 /// Put file name into the specified buffer with quotes 2103 /// 2104 /// Replaces home directory at the start with `~`. 2105 /// 2106 /// @param[out] ret_buf Buffer to save results to. 2107 /// @param[in] buf_len ret_buf length. 2108 /// @param[in] buf buf_T file name is coming from. 2109 /// @param[in] fname File name to write. 2110 void add_quoted_fname(char *const ret_buf, const size_t buf_len, const buf_T *const buf, 2111 const char *fname) 2112 FUNC_ATTR_NONNULL_ARG(1) 2113 { 2114 if (fname == NULL) { 2115 fname = "-stdin-"; 2116 } 2117 ret_buf[0] = '"'; 2118 home_replace(buf, fname, ret_buf + 1, buf_len - 4, true); 2119 xstrlcat(ret_buf, "\" ", buf_len); 2120 } 2121 2122 /// Append message for text mode to IObuff. 2123 /// 2124 /// @param eol_type line ending type 2125 /// 2126 /// @return true if something was appended. 2127 bool msg_add_fileformat(int eol_type) 2128 { 2129 #ifndef USE_CRNL 2130 if (eol_type == EOL_DOS) { 2131 xstrlcat(IObuff, _("[dos]"), IOSIZE); 2132 return true; 2133 } 2134 #endif 2135 if (eol_type == EOL_MAC) { 2136 xstrlcat(IObuff, _("[mac]"), IOSIZE); 2137 return true; 2138 } 2139 #ifdef USE_CRNL 2140 if (eol_type == EOL_UNIX) { 2141 xstrlcat(IObuff, _("[unix]"), IOSIZE); 2142 return true; 2143 } 2144 #endif 2145 return false; 2146 } 2147 2148 /// Append line and character count to IObuff. 2149 void msg_add_lines(int insert_space, linenr_T lnum, off_T nchars) 2150 { 2151 size_t len = strlen(IObuff); 2152 2153 if (shortmess(SHM_LINES)) { 2154 snprintf(IObuff + len, IOSIZE - len, 2155 _("%s%" PRId64 "L, %" PRId64 "B"), // l10n: L as in line, B as in byte 2156 insert_space ? " " : "", (int64_t)lnum, (int64_t)nchars); 2157 } else { 2158 len += (size_t)snprintf(IObuff + len, IOSIZE - len, 2159 NGETTEXT("%s%" PRId64 " line, ", "%s%" PRId64 " lines, ", lnum), 2160 insert_space ? " " : "", (int64_t)lnum); 2161 snprintf(IObuff + len, IOSIZE - len, 2162 NGETTEXT("%" PRId64 " byte", "%" PRId64 " bytes", nchars), 2163 (int64_t)nchars); 2164 } 2165 } 2166 2167 bool time_differs(const FileInfo *file_info, int64_t mtime, int64_t mtime_ns) 2168 FUNC_ATTR_CONST 2169 { 2170 #if defined(__linux__) || defined(MSWIN) 2171 return file_info->stat.st_mtim.tv_nsec != mtime_ns 2172 // On a FAT filesystem, esp. under Linux, there are only 5 bits to store 2173 // the seconds. Since the roundoff is done when flushing the inode, the 2174 // time may change unexpectedly by one second!!! 2175 || file_info->stat.st_mtim.tv_sec - mtime > 1 2176 || mtime - file_info->stat.st_mtim.tv_sec > 1; 2177 #else 2178 return file_info->stat.st_mtim.tv_nsec != mtime_ns 2179 || file_info->stat.st_mtim.tv_sec != mtime; 2180 #endif 2181 } 2182 2183 /// Return true if file encoding "fenc" requires conversion from or to 2184 /// 'encoding'. 2185 /// 2186 /// @param fenc file encoding to check 2187 /// 2188 /// @return true if conversion is required 2189 bool need_conversion(const char *fenc) 2190 FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT 2191 { 2192 bool same_encoding; 2193 int fenc_flags; 2194 2195 if (*fenc == NUL || strcmp(p_enc, fenc) == 0) { 2196 same_encoding = true; 2197 fenc_flags = 0; 2198 } else { 2199 // Ignore difference between "ansi" and "latin1", "ucs-4" and 2200 // "ucs-4be", etc. 2201 int enc_flags = get_fio_flags(p_enc); 2202 fenc_flags = get_fio_flags(fenc); 2203 same_encoding = (enc_flags != 0 && fenc_flags == enc_flags); 2204 } 2205 if (same_encoding) { 2206 // Specified file encoding matches UTF-8. 2207 return false; 2208 } 2209 2210 // Encodings differ. However, conversion is not needed when 'enc' is any 2211 // Unicode encoding and the file is UTF-8. 2212 return !(fenc_flags == FIO_UTF8); 2213 } 2214 2215 /// Return the FIO_ flags needed for the internal conversion if 'name' was 2216 /// unicode or latin1, otherwise 0. If "name" is an empty string, 2217 /// use 'encoding'. 2218 /// 2219 /// @param name string to check for encoding 2220 int get_fio_flags(const char *name) 2221 { 2222 if (*name == NUL) { 2223 name = p_enc; 2224 } 2225 int prop = enc_canon_props(name); 2226 if (prop & ENC_UNICODE) { 2227 if (prop & ENC_2BYTE) { 2228 if (prop & ENC_ENDIAN_L) { 2229 return FIO_UCS2 | FIO_ENDIAN_L; 2230 } 2231 return FIO_UCS2; 2232 } 2233 if (prop & ENC_4BYTE) { 2234 if (prop & ENC_ENDIAN_L) { 2235 return FIO_UCS4 | FIO_ENDIAN_L; 2236 } 2237 return FIO_UCS4; 2238 } 2239 if (prop & ENC_2WORD) { 2240 if (prop & ENC_ENDIAN_L) { 2241 return FIO_UTF16 | FIO_ENDIAN_L; 2242 } 2243 return FIO_UTF16; 2244 } 2245 return FIO_UTF8; 2246 } 2247 if (prop & ENC_LATIN1) { 2248 return FIO_LATIN1; 2249 } 2250 // must be ENC_DBCS, requires iconv() 2251 return 0; 2252 } 2253 2254 /// Check for a Unicode BOM (Byte Order Mark) at the start of p[size]. 2255 /// "size" must be at least 2. 2256 /// 2257 /// @return the name of the encoding and set "*lenp" to the length or, 2258 /// NULL when no BOM found. 2259 static char *check_for_bom(const char *p_in, int size, int *lenp, int flags) 2260 { 2261 const uint8_t *p = (const uint8_t *)p_in; 2262 char *name = NULL; 2263 int len = 2; 2264 2265 if (p[0] == 0xef && p[1] == 0xbb && size >= 3 && p[2] == 0xbf 2266 && (flags == FIO_ALL || flags == FIO_UTF8 || flags == 0)) { 2267 name = "utf-8"; // EF BB BF 2268 len = 3; 2269 } else if (p[0] == 0xff && p[1] == 0xfe) { 2270 if (size >= 4 && p[2] == 0 && p[3] == 0 2271 && (flags == FIO_ALL || flags == (FIO_UCS4 | FIO_ENDIAN_L))) { 2272 name = "ucs-4le"; // FF FE 00 00 2273 len = 4; 2274 } else if (flags == (FIO_UCS2 | FIO_ENDIAN_L)) { 2275 name = "ucs-2le"; // FF FE 2276 } else if (flags == FIO_ALL 2277 || flags == (FIO_UTF16 | FIO_ENDIAN_L)) { 2278 // utf-16le is preferred, it also works for ucs-2le text 2279 name = "utf-16le"; // FF FE 2280 } 2281 } else if (p[0] == 0xfe && p[1] == 0xff 2282 && (flags == FIO_ALL || flags == FIO_UCS2 || flags == 2283 FIO_UTF16)) { 2284 // Default to utf-16, it works also for ucs-2 text. 2285 if (flags == FIO_UCS2) { 2286 name = "ucs-2"; // FE FF 2287 } else { 2288 name = "utf-16"; // FE FF 2289 } 2290 } else if (size >= 4 && p[0] == 0 && p[1] == 0 && p[2] == 0xfe 2291 && p[3] == 0xff && (flags == FIO_ALL || flags == FIO_UCS4)) { 2292 name = "ucs-4"; // 00 00 FE FF 2293 len = 4; 2294 } 2295 2296 *lenp = len; 2297 return name; 2298 } 2299 2300 /// Shorten filename of a buffer. 2301 /// 2302 /// @param force when true: Use full path from now on for files currently being 2303 /// edited, both for file name and swap file name. Try to shorten the file 2304 /// names a bit, if safe to do so. 2305 /// when false: Only try to shorten absolute file names. 2306 /// 2307 /// For buffers that have buftype "nofile" or "scratch": never change the file 2308 /// name. 2309 void shorten_buf_fname(buf_T *buf, char *dirname, int force) 2310 { 2311 if (buf->b_fname != NULL 2312 && !bt_nofilename(buf) 2313 && !path_with_url(buf->b_fname) 2314 && (force 2315 || buf->b_sfname == NULL 2316 || path_is_absolute(buf->b_sfname))) { 2317 if (buf->b_sfname != buf->b_ffname) { 2318 XFREE_CLEAR(buf->b_sfname); 2319 } 2320 char *p = path_shorten_fname(buf->b_ffname, dirname); 2321 if (p != NULL) { 2322 buf->b_sfname = xstrdup(p); 2323 buf->b_fname = buf->b_sfname; 2324 } 2325 if (p == NULL) { 2326 buf->b_fname = buf->b_ffname; 2327 } 2328 } 2329 } 2330 2331 /// Shorten filenames for all buffers. 2332 void shorten_fnames(int force) 2333 { 2334 char dirname[MAXPATHL]; 2335 2336 os_dirname(dirname, MAXPATHL); 2337 FOR_ALL_BUFFERS(buf) { 2338 shorten_buf_fname(buf, dirname, force); 2339 2340 // Always make the swap file name a full path, a "nofile" buffer may 2341 // also have a swap file. 2342 mf_fullname(buf->b_ml.ml_mfp); 2343 } 2344 status_redraw_all(); 2345 redraw_tabline = true; 2346 } 2347 2348 /// Get new filename ended by given extension. 2349 /// 2350 /// @param fname The original filename. 2351 /// If NULL, use current directory name and ext to 2352 /// compute new filename. 2353 /// @param ext The extension to add to the filename. 2354 /// 4 chars max if prefixed with a dot, 3 otherwise. 2355 /// @param prepend_dot If true, prefix ext with a dot. 2356 /// Does nothing if ext already starts with a dot, or 2357 /// if fname is NULL. 2358 /// 2359 /// @return [allocated] - A new filename, made up from: 2360 /// * fname + ext, if fname not NULL. 2361 /// * current dir + ext, if fname is NULL. 2362 /// Result is guaranteed to: 2363 /// * be ended by <ext>. 2364 /// * have a basename with at most BASENAMELEN chars: 2365 /// original basename is truncated if necessary. 2366 /// * be different than original: basename chars are 2367 /// replaced by "_" if necessary. If that can't be done 2368 /// because truncated value of original filename was 2369 /// made of all underscores, replace first "_" by "v". 2370 /// - NULL, if fname is NULL and there was a problem trying 2371 /// to get current directory. 2372 char *modname(const char *fname, const char *ext, bool prepend_dot) 2373 FUNC_ATTR_NONNULL_ARG(2) 2374 { 2375 char *retval; 2376 size_t fnamelen; 2377 size_t extlen = strlen(ext); 2378 2379 // If there is no file name we must get the name of the current directory 2380 // (we need the full path in case :cd is used). 2381 if (fname == NULL || *fname == NUL) { 2382 retval = xmalloc(MAXPATHL + extlen + 3); // +3 for PATHSEP, "_" (Win), NUL 2383 if (os_dirname(retval, MAXPATHL) == FAIL 2384 || strlen(retval) == 0) { 2385 xfree(retval); 2386 return NULL; 2387 } 2388 add_pathsep(retval); 2389 fnamelen = strlen(retval); 2390 prepend_dot = false; // nothing to prepend a dot to 2391 } else { 2392 fnamelen = strlen(fname); 2393 retval = xmalloc(fnamelen + extlen + 3); 2394 strcpy(retval, fname); // NOLINT(runtime/printf) 2395 } 2396 2397 // Search backwards until we hit a '/', '\' or ':'. 2398 // Then truncate what is after the '/', '\' or ':' to BASENAMELEN characters. 2399 char *ptr = NULL; 2400 for (ptr = retval + fnamelen; ptr > retval; MB_PTR_BACK(retval, ptr)) { 2401 if (vim_ispathsep(*ptr)) { 2402 ptr++; 2403 break; 2404 } 2405 } 2406 2407 // the file name has at most BASENAMELEN characters. 2408 size_t ptrlen = fnamelen - (size_t)(ptr - retval); 2409 if (ptrlen > (unsigned)BASENAMELEN) { 2410 ptrlen = BASENAMELEN; 2411 ptr[ptrlen] = NUL; 2412 } 2413 2414 char *s = ptr + ptrlen; 2415 2416 // Append the extension. 2417 // ext can start with '.' and cannot exceed 3 more characters. 2418 strcpy(s, ext); // NOLINT(runtime/printf) 2419 2420 char *e; 2421 // Prepend the dot if needed. 2422 if (prepend_dot && *(e = path_tail(retval)) != '.') { 2423 memmove(e + 1, e, ((fnamelen + extlen) - (size_t)(e - retval)) + 1); // +1 for NUL 2424 *e = '.'; 2425 } 2426 2427 // Check that, after appending the extension, the file name is really 2428 // different. 2429 if (fname != NULL && strcmp(fname, retval) == 0) { 2430 // we search for a character that can be replaced by '_' 2431 while (--s >= ptr) { 2432 if (*s != '_') { 2433 *s = '_'; 2434 break; 2435 } 2436 } 2437 if (s < ptr) { // fname was "________.<ext>", how tricky! 2438 *ptr = 'v'; 2439 } 2440 } 2441 return retval; 2442 } 2443 2444 /// Like fgets(), but if the file line is too long, it is truncated and the 2445 /// rest of the line is thrown away. 2446 /// 2447 /// @param[out] buf buffer to fill 2448 /// @param size size of the buffer 2449 /// @param fp file to read from 2450 /// 2451 /// @return true for EOF or error 2452 bool vim_fgets(char *buf, int size, FILE *fp) 2453 FUNC_ATTR_NONNULL_ALL 2454 { 2455 char *retval; 2456 2457 assert(size > 0); 2458 buf[size - 2] = NUL; 2459 2460 do { 2461 errno = 0; 2462 retval = fgets(buf, size, fp); 2463 } while (retval == NULL && errno == EINTR && ferror(fp)); 2464 2465 if (buf[size - 2] != NUL && buf[size - 2] != '\n') { 2466 char tbuf[200]; 2467 2468 buf[size - 1] = NUL; // Truncate the line. 2469 2470 // Now throw away the rest of the line: 2471 do { 2472 tbuf[sizeof(tbuf) - 2] = NUL; 2473 errno = 0; 2474 retval = fgets(tbuf, sizeof(tbuf), fp); 2475 if (retval == NULL && (feof(fp) || errno != EINTR)) { 2476 break; 2477 } 2478 } while (tbuf[sizeof(tbuf) - 2] != NUL && tbuf[sizeof(tbuf) - 2] != '\n'); 2479 } 2480 return retval == NULL; 2481 } 2482 2483 /// Read 2 bytes from "fd" and turn them into an int, MSB first. 2484 /// 2485 /// @return -1 when encountering EOF. 2486 int get2c(FILE *fd) 2487 { 2488 const int n = getc(fd); 2489 if (n == EOF) { 2490 return -1; 2491 } 2492 const int c = getc(fd); 2493 if (c == EOF) { 2494 return -1; 2495 } 2496 return (n << 8) + c; 2497 } 2498 2499 /// Read 3 bytes from "fd" and turn them into an int, MSB first. 2500 /// 2501 /// @return -1 when encountering EOF. 2502 int get3c(FILE *fd) 2503 { 2504 int n = getc(fd); 2505 if (n == EOF) { 2506 return -1; 2507 } 2508 int c = getc(fd); 2509 if (c == EOF) { 2510 return -1; 2511 } 2512 n = (n << 8) + c; 2513 c = getc(fd); 2514 if (c == EOF) { 2515 return -1; 2516 } 2517 return (n << 8) + c; 2518 } 2519 2520 /// Read 4 bytes from "fd" and turn them into an int, MSB first. 2521 /// 2522 /// @return -1 when encountering EOF. 2523 int get4c(FILE *fd) 2524 { 2525 // Use unsigned rather than int otherwise result is undefined 2526 // when left-shift sets the MSB. 2527 unsigned n; 2528 2529 int c = getc(fd); 2530 if (c == EOF) { 2531 return -1; 2532 } 2533 n = (unsigned)c; 2534 c = getc(fd); 2535 if (c == EOF) { 2536 return -1; 2537 } 2538 n = (n << 8) + (unsigned)c; 2539 c = getc(fd); 2540 if (c == EOF) { 2541 return -1; 2542 } 2543 n = (n << 8) + (unsigned)c; 2544 c = getc(fd); 2545 if (c == EOF) { 2546 return -1; 2547 } 2548 n = (n << 8) + (unsigned)c; 2549 return (int)n; 2550 } 2551 2552 /// Read 8 bytes from `fd` and turn them into a time_t, MSB first. 2553 /// 2554 /// @return -1 when encountering EOF. 2555 time_t get8ctime(FILE *fd) 2556 { 2557 time_t n = 0; 2558 2559 for (int i = 0; i < 8; i++) { 2560 const int c = getc(fd); 2561 if (c == EOF) { 2562 return -1; 2563 } 2564 n = (n << 8) + c; 2565 } 2566 return n; 2567 } 2568 2569 /// Reads a string of length "cnt" from "fd" into allocated memory. 2570 /// 2571 /// @return pointer to the string or NULL when unable to read that many bytes. 2572 char *read_string(FILE *fd, size_t cnt) 2573 { 2574 char *str = xmallocz(cnt); 2575 for (size_t i = 0; i < cnt; i++) { 2576 int c = getc(fd); 2577 if (c == EOF) { 2578 xfree(str); 2579 return NULL; 2580 } 2581 str[i] = (char)c; 2582 } 2583 return str; 2584 } 2585 2586 /// Writes a number to file "fd", most significant bit first, in "len" bytes. 2587 /// 2588 /// @return false in case of an error. 2589 bool put_bytes(FILE *fd, uintmax_t number, size_t len) 2590 { 2591 assert(len > 0); 2592 for (size_t i = len - 1; i < len; i--) { 2593 if (putc((int)(number >> (i * 8)), fd) == EOF) { 2594 return false; 2595 } 2596 } 2597 return true; 2598 } 2599 2600 /// Writes time_t to file "fd" in 8 bytes. 2601 /// 2602 /// @return FAIL when the write failed. 2603 int put_time(FILE *fd, time_t time_) 2604 { 2605 uint8_t buf[8]; 2606 time_to_bytes(time_, buf); 2607 return fwrite(buf, sizeof(uint8_t), ARRAY_SIZE(buf), fd) == 1 ? OK : FAIL; 2608 } 2609 2610 static int rename_with_tmp(const char *const from, const char *const to) 2611 { 2612 // Find a name that doesn't exist and is in the same directory. 2613 // Rename "from" to "tempname" and then rename "tempname" to "to". 2614 if (strlen(from) >= MAXPATHL - 5) { 2615 return -1; 2616 } 2617 2618 char tempname[MAXPATHL + 1]; 2619 STRCPY(tempname, from); 2620 for (int n = 123; n < 99999; n++) { 2621 char *tail = path_tail(tempname); 2622 snprintf(tail, (size_t)((MAXPATHL + 1) - (tail - tempname)), "%d", n); 2623 2624 if (!os_path_exists(tempname)) { 2625 if (os_rename(from, tempname) == OK) { 2626 if (os_rename(tempname, to) == OK) { 2627 return 0; 2628 } 2629 // Strange, the second step failed. Try moving the 2630 // file back and return failure. 2631 os_rename(tempname, from); 2632 return -1; 2633 } 2634 // If it fails for one temp name it will most likely fail 2635 // for any temp name, give up. 2636 return -1; 2637 } 2638 } 2639 return -1; 2640 } 2641 2642 /// os_rename() only works if both files are on the same file system, this 2643 /// function will (attempts to?) copy the file across if rename fails -- webb 2644 /// 2645 /// @return -1 for failure, 0 for success 2646 int vim_rename(const char *from, const char *to) 2647 FUNC_ATTR_NONNULL_ALL 2648 { 2649 bool use_tmp_file = false; 2650 2651 // When the names are identical, there is nothing to do. When they refer 2652 // to the same file (ignoring case and slash/backslash differences) but 2653 // the file name differs we need to go through a temp file. 2654 if (path_fnamecmp(from, to) == 0) { 2655 if (p_fic && (strcmp(path_tail(from), path_tail(to)) != 0)) { 2656 use_tmp_file = true; 2657 } else { 2658 return 0; 2659 } 2660 } 2661 2662 // Fail if the "from" file doesn't exist. Avoids that "to" is deleted. 2663 FileInfo from_info; 2664 if (!os_fileinfo(from, &from_info)) { 2665 return -1; 2666 } 2667 2668 // It's possible for the source and destination to be the same file. 2669 // This happens when "from" and "to" differ in case and are on a FAT32 2670 // filesystem. In that case go through a temp file name. 2671 FileInfo to_info; 2672 if (os_fileinfo(to, &to_info) && os_fileinfo_id_equal(&from_info, &to_info)) { 2673 use_tmp_file = true; 2674 } 2675 2676 if (use_tmp_file) { 2677 return rename_with_tmp(from, to); 2678 } 2679 2680 // Delete the "to" file, this is required on some systems to make the 2681 // os_rename() work, on other systems it makes sure that we don't have 2682 // two files when the os_rename() fails. 2683 2684 os_remove(to); 2685 2686 // First try a normal rename, return if it works. 2687 if (os_rename(from, to) == OK) { 2688 return 0; 2689 } 2690 2691 // Rename() failed, try copying the file. 2692 int ret = vim_copyfile(from, to); 2693 if (ret != OK) { 2694 return -1; 2695 } 2696 2697 if (os_fileinfo(from, &from_info)) { 2698 os_remove(from); 2699 } 2700 2701 return 0; 2702 } 2703 2704 /// Create the new file with same permissions as the original. 2705 /// Return FAIL for failure, OK for success. 2706 int vim_copyfile(const char *from, const char *to) 2707 { 2708 char *errmsg = NULL; 2709 2710 #ifdef HAVE_READLINK 2711 FileInfo from_info; 2712 if (os_fileinfo_link(from, &from_info) && S_ISLNK(from_info.stat.st_mode)) { 2713 int ret = -1; 2714 2715 char linkbuf[MAXPATHL + 1]; 2716 ssize_t len = readlink(from, linkbuf, MAXPATHL); 2717 if (len > 0) { 2718 linkbuf[len] = NUL; 2719 2720 // Create link 2721 ret = symlink(linkbuf, to); 2722 } 2723 2724 return ret == 0 ? OK : FAIL; 2725 } 2726 #endif 2727 2728 // For systems that support ACL: get the ACL from the original file. 2729 vim_acl_T acl = os_get_acl(from); 2730 2731 if (os_copy(from, to, UV_FS_COPYFILE_EXCL) != 0) { 2732 os_free_acl(acl); 2733 return FAIL; 2734 } 2735 2736 os_set_acl(to, acl); 2737 os_free_acl(acl); 2738 if (errmsg != NULL) { 2739 semsg(errmsg, to); 2740 return FAIL; 2741 } 2742 return OK; 2743 } 2744 2745 static bool already_warned = false; 2746 2747 /// Check if any not hidden buffer has been changed. 2748 /// Postpone the check if there are characters in the stuff buffer, a global 2749 /// command is being executed, a mapping is being executed or an autocommand is 2750 /// busy. 2751 /// 2752 /// @param focus called for GUI focus event 2753 /// 2754 /// @return true if some message was written (screen should be redrawn and cursor positioned). 2755 int check_timestamps(int focus) 2756 { 2757 // Don't check timestamps while system() or another low-level function may 2758 // cause us to lose and gain focus. 2759 if (no_check_timestamps > 0) { 2760 return false; 2761 } 2762 2763 // Avoid doing a check twice. The OK/Reload dialog can cause a focus 2764 // event and we would keep on checking if the file is steadily growing. 2765 // Do check again after typing something. 2766 if (focus && did_check_timestamps) { 2767 need_check_timestamps = true; 2768 return false; 2769 } 2770 2771 int didit = 0; 2772 2773 if (!stuff_empty() || global_busy || !typebuf_typed() 2774 || autocmd_busy || curbuf->b_ro_locked > 0 2775 || allbuf_lock > 0) { 2776 need_check_timestamps = true; // check later 2777 } else { 2778 no_wait_return++; 2779 did_check_timestamps = true; 2780 already_warned = false; 2781 FOR_ALL_BUFFERS(buf) { 2782 // Only check buffers in a window. 2783 if (buf->b_nwindows > 0) { 2784 bufref_T bufref; 2785 set_bufref(&bufref, buf); 2786 const int n = buf_check_timestamp(buf); 2787 didit = MAX(didit, n); 2788 if (n > 0 && !bufref_valid(&bufref)) { 2789 // Autocommands have removed the buffer, start at the first one again. 2790 buf = firstbuf; 2791 continue; 2792 } 2793 } 2794 } 2795 no_wait_return--; 2796 need_check_timestamps = false; 2797 if (need_wait_return && didit == 2) { 2798 // make sure msg isn't overwritten 2799 msg_puts("\n"); 2800 ui_flush(); 2801 } 2802 } 2803 return didit; 2804 } 2805 2806 /// Move all the lines from buffer "frombuf" to buffer "tobuf". 2807 /// 2808 /// @return OK or FAIL. 2809 /// When FAIL "tobuf" is incomplete and/or "frombuf" is not empty. 2810 static int move_lines(buf_T *frombuf, buf_T *tobuf) 2811 { 2812 buf_T *tbuf = curbuf; 2813 int retval = OK; 2814 2815 // Copy the lines in "frombuf" to "tobuf". 2816 curbuf = tobuf; 2817 for (linenr_T lnum = 1; lnum <= frombuf->b_ml.ml_line_count; lnum++) { 2818 char *p = xmemdupz(ml_get_buf(frombuf, lnum), (size_t)ml_get_buf_len(frombuf, lnum)); 2819 if (ml_append(lnum - 1, p, 0, false) == FAIL) { 2820 xfree(p); 2821 retval = FAIL; 2822 break; 2823 } 2824 xfree(p); 2825 } 2826 2827 // Delete all the lines in "frombuf". 2828 if (retval != FAIL) { 2829 curbuf = frombuf; 2830 for (linenr_T lnum = curbuf->b_ml.ml_line_count; lnum > 0; lnum--) { 2831 if (ml_delete(lnum) == FAIL) { 2832 // Oops! We could try putting back the saved lines, but that 2833 // might fail again... 2834 retval = FAIL; 2835 break; 2836 } 2837 } 2838 } 2839 2840 curbuf = tbuf; 2841 return retval; 2842 } 2843 2844 /// Check if buffer "buf" has been changed. 2845 /// Also check if the file for a new buffer unexpectedly appeared. 2846 /// 2847 /// @return 1 if a changed buffer was found or, 2848 /// 2 if a message has been displayed or, 2849 /// 0 otherwise. 2850 int buf_check_timestamp(buf_T *buf) 2851 FUNC_ATTR_NONNULL_ALL 2852 { 2853 int retval = 0; 2854 char *mesg = NULL; 2855 char *mesg2 = ""; 2856 bool helpmesg = false; 2857 2858 enum { 2859 RELOAD_NONE, 2860 RELOAD_NORMAL, 2861 RELOAD_DETECT, 2862 } reload = RELOAD_NONE; 2863 2864 bool can_reload = false; 2865 uint64_t orig_size = buf->b_orig_size; 2866 int orig_mode = buf->b_orig_mode; 2867 static bool busy = false; 2868 2869 bufref_T bufref; 2870 set_bufref(&bufref, buf); 2871 2872 // If its a terminal, there is no file name, the buffer is not loaded, 2873 // 'buftype' is set, we are in the middle of a save or being called 2874 // recursively: ignore this buffer. 2875 if (buf->terminal 2876 || buf->b_ffname == NULL 2877 || buf->b_ml.ml_mfp == NULL 2878 || !bt_normal(buf) 2879 || buf->b_saving 2880 || busy) { 2881 return 0; 2882 } 2883 2884 FileInfo file_info; 2885 bool file_info_ok; 2886 if (!(buf->b_flags & BF_NOTEDITED) 2887 && buf->b_mtime != 0 2888 && (!(file_info_ok = os_fileinfo(buf->b_ffname, &file_info)) 2889 || time_differs(&file_info, buf->b_mtime, buf->b_mtime_ns) 2890 || (int)file_info.stat.st_mode != buf->b_orig_mode)) { 2891 const int64_t prev_b_mtime = buf->b_mtime; 2892 2893 retval = 1; 2894 2895 // set b_mtime to stop further warnings (e.g., when executing 2896 // FileChangedShell autocmd) 2897 if (!file_info_ok) { 2898 // Check the file again later to see if it re-appears. 2899 buf->b_mtime = -1; 2900 buf->b_orig_size = 0; 2901 buf->b_orig_mode = 0; 2902 } else { 2903 buf_store_file_info(buf, &file_info); 2904 } 2905 2906 if (os_isdir(buf->b_fname)) { 2907 // Don't do anything for a directory. Might contain the file explorer. 2908 } else if ((buf->b_p_ar >= 0 ? buf->b_p_ar : p_ar) 2909 && !bufIsChanged(buf) && file_info_ok) { 2910 // If 'autoread' is set, the buffer has no changes and the file still 2911 // exists, reload the buffer. Use the buffer-local option value if it 2912 // was set, the global option value otherwise. 2913 reload = RELOAD_NORMAL; 2914 } else { 2915 char *reason; 2916 size_t reasonlen; 2917 2918 if (!file_info_ok) { 2919 reason = "deleted"; 2920 reasonlen = STRLEN_LITERAL("deleted"); 2921 } else if (bufIsChanged(buf)) { 2922 reason = "conflict"; 2923 reasonlen = STRLEN_LITERAL("conflict"); 2924 } else if (orig_size != buf->b_orig_size || buf_contents_changed(buf)) { 2925 reason = "changed"; 2926 reasonlen = STRLEN_LITERAL("changed"); 2927 } else if (orig_mode != buf->b_orig_mode) { 2928 reason = "mode"; 2929 reasonlen = STRLEN_LITERAL("mode"); 2930 } else { 2931 reason = "time"; 2932 reasonlen = STRLEN_LITERAL("time"); 2933 } 2934 2935 // Only give the warning if there are no FileChangedShell 2936 // autocommands. 2937 // Avoid being called recursively by setting "busy". 2938 busy = true; 2939 set_vim_var_string(VV_FCS_REASON, reason, (int)reasonlen); 2940 set_vim_var_string(VV_FCS_CHOICE, "", 0); 2941 allbuf_lock++; 2942 bool n = apply_autocmds(EVENT_FILECHANGEDSHELL, buf->b_fname, buf->b_fname, false, buf); 2943 allbuf_lock--; 2944 busy = false; 2945 if (n) { 2946 if (!bufref_valid(&bufref)) { 2947 emsg(_("E246: FileChangedShell autocommand deleted buffer")); 2948 } 2949 char *s = get_vim_var_str(VV_FCS_CHOICE); 2950 if (strcmp(s, "reload") == 0 && *reason != 'd') { 2951 reload = RELOAD_NORMAL; 2952 } else if (strcmp(s, "edit") == 0) { 2953 reload = RELOAD_DETECT; 2954 } else if (strcmp(s, "ask") == 0) { 2955 n = false; 2956 } else { 2957 return 2; 2958 } 2959 } 2960 if (!n) { 2961 if (*reason == 'd') { 2962 // Only give the message once. 2963 if (prev_b_mtime != -1) { 2964 mesg = _("E211: File \"%s\" no longer available"); 2965 } 2966 } else { 2967 helpmesg = true; 2968 can_reload = true; 2969 2970 // Check if the file contents really changed to avoid 2971 // giving a warning when only the timestamp was set (e.g., 2972 // checked out of CVS). Always warn when the buffer was 2973 // changed. 2974 if (reason[2] == 'n') { 2975 mesg = 2976 _("W12: Warning: File \"%s\" has changed and the buffer was changed in Vim as well"); 2977 mesg2 = _("See \":help W12\" for more info."); 2978 } else if (reason[1] == 'h') { 2979 mesg = _("W11: Warning: File \"%s\" has changed since editing started"); 2980 mesg2 = _("See \":help W11\" for more info."); 2981 } else if (*reason == 'm') { 2982 mesg = _("W16: Warning: Mode of file \"%s\" has changed since editing started"); 2983 mesg2 = _("See \":help W16\" for more info."); 2984 } else { 2985 // Only timestamp changed, store it to avoid a warning 2986 // in check_mtime() later. 2987 buf->b_mtime_read = buf->b_mtime; 2988 buf->b_mtime_read_ns = buf->b_mtime_ns; 2989 } 2990 } 2991 } 2992 } 2993 } else if ((buf->b_flags & BF_NEW) && !(buf->b_flags & BF_NEW_W) 2994 && os_path_exists(buf->b_ffname)) { 2995 retval = 1; 2996 mesg = _("W13: Warning: File \"%s\" has been created after editing started"); 2997 buf->b_flags |= BF_NEW_W; 2998 can_reload = true; 2999 } 3000 3001 if (mesg != NULL) { 3002 char *path = home_replace_save(buf, buf->b_fname); 3003 if (!helpmesg) { 3004 mesg2 = ""; 3005 } 3006 // +2 for either '\n' or "; " and +1 for NUL 3007 const size_t tbufsize = strlen(path) + strlen(mesg) + strlen(mesg2) + 3; 3008 char *const tbuf = xmalloc(tbufsize); 3009 int tbuflen = snprintf(tbuf, tbufsize, mesg, path); 3010 // Set warningmsg here, before the unimportant and output-specific 3011 // mesg2 has been appended. 3012 set_vim_var_string(VV_WARNINGMSG, tbuf, tbuflen); 3013 if (can_reload) { 3014 if (*mesg2 != NUL) { 3015 snprintf(tbuf + tbuflen, tbufsize - (size_t)tbuflen, "\n%s", mesg2); 3016 } 3017 switch (do_dialog(VIM_WARNING, _("Warning"), tbuf, 3018 _("&OK\n&Load File\nLoad File &and Options"), 3019 1, NULL, true)) { 3020 case 2: 3021 reload = RELOAD_NORMAL; 3022 break; 3023 case 3: 3024 reload = RELOAD_DETECT; 3025 break; 3026 } 3027 } else if (State > MODE_NORMAL_BUSY || (State & MODE_CMDLINE) || already_warned) { 3028 if (*mesg2 != NUL) { 3029 snprintf(tbuf + tbuflen, tbufsize - (size_t)tbuflen, "; %s", mesg2); 3030 } 3031 emsg(tbuf); 3032 retval = 2; 3033 } else { 3034 if (!autocmd_busy) { 3035 msg_start(); 3036 msg_puts_hl(tbuf, HLF_E, true); 3037 if (*mesg2 != NUL) { 3038 msg_puts_hl(mesg2, HLF_W, true); 3039 } 3040 msg_clr_eos(); 3041 msg_end(); 3042 if (emsg_silent == 0 && !in_assert_fails && !ui_has(kUIMessages)) { 3043 msg_delay(1004, true); // give the user some time to think about it 3044 redraw_cmdline = false; // don't redraw and erase the message 3045 } 3046 } 3047 already_warned = true; 3048 } 3049 3050 xfree(tbuf); 3051 xfree(path); 3052 } 3053 3054 if (reload != RELOAD_NONE) { 3055 // Reload the buffer. 3056 buf_reload(buf, orig_mode, reload == RELOAD_DETECT); 3057 if (bufref_valid(&bufref) && buf->b_p_udf && buf->b_ffname != NULL) { 3058 uint8_t hash[UNDO_HASH_SIZE]; 3059 3060 // Any existing undo file is unusable, write it now. 3061 u_compute_hash(buf, hash); 3062 u_write_undo(NULL, false, buf, hash); 3063 } 3064 } 3065 3066 // Trigger FileChangedShell when the file was changed in any way. 3067 if (bufref_valid(&bufref) && retval != 0) { 3068 apply_autocmds(EVENT_FILECHANGEDSHELLPOST, buf->b_fname, buf->b_fname, false, buf); 3069 } 3070 return retval; 3071 } 3072 3073 /// Reload a buffer that is already loaded. 3074 /// Used when the file was changed outside of Vim. 3075 /// "orig_mode" is buf->b_orig_mode before the need for reloading was detected. 3076 /// buf->b_orig_mode may have been reset already. 3077 void buf_reload(buf_T *buf, int orig_mode, bool reload_options) 3078 { 3079 exarg_T ea; 3080 int old_ro = buf->b_p_ro; 3081 buf_T *savebuf; 3082 bufref_T bufref; 3083 int saved = OK; 3084 aco_save_T aco; 3085 int flags = READ_NEW; 3086 3087 // Set curwin/curbuf for "buf" and save some things. 3088 aucmd_prepbuf(&aco, buf); 3089 3090 // Unless reload_options is set, we only want to read the text from the 3091 // file, not reset the syntax highlighting, clear marks, diff status, etc. 3092 // Force the fileformat and encoding to be the same. 3093 if (reload_options) { 3094 CLEAR_FIELD(ea); 3095 } else { 3096 prep_exarg(&ea, buf); 3097 } 3098 3099 pos_T old_cursor = curwin->w_cursor; 3100 linenr_T old_topline = curwin->w_topline; 3101 3102 if (p_ur < 0 || curbuf->b_ml.ml_line_count <= p_ur) { 3103 // Save all the text, so that the reload can be undone. 3104 // Sync first so that this is a separate undo-able action. 3105 u_sync(false); 3106 saved = u_savecommon(curbuf, 0, curbuf->b_ml.ml_line_count + 1, 0, true); 3107 flags |= READ_KEEP_UNDO; 3108 } 3109 3110 // To behave like when a new file is edited (matters for 3111 // BufReadPost autocommands) we first need to delete the current 3112 // buffer contents. But if reading the file fails we should keep 3113 // the old contents. Can't use memory only, the file might be 3114 // too big. Use a hidden buffer to move the buffer contents to. 3115 if (buf_is_empty(curbuf) || saved == FAIL) { 3116 savebuf = NULL; 3117 } else { 3118 // Allocate a buffer without putting it in the buffer list. 3119 savebuf = buflist_new(NULL, NULL, 1, BLN_DUMMY); 3120 set_bufref(&bufref, savebuf); 3121 if (savebuf != NULL && buf == curbuf) { 3122 // Open the memline. 3123 curbuf = savebuf; 3124 curwin->w_buffer = savebuf; 3125 saved = ml_open(curbuf); 3126 curbuf = buf; 3127 curwin->w_buffer = buf; 3128 } 3129 if (savebuf == NULL || saved == FAIL || buf != curbuf 3130 || move_lines(buf, savebuf) == FAIL) { 3131 semsg(_("E462: Could not prepare for reloading \"%s\""), 3132 buf->b_fname); 3133 saved = FAIL; 3134 } 3135 } 3136 3137 if (saved == OK) { 3138 curbuf->b_flags |= BF_CHECK_RO; // check for RO again 3139 curbuf->b_keep_filetype = true; // don't detect 'filetype' 3140 if (readfile(buf->b_ffname, buf->b_fname, 0, 0, 3141 (linenr_T)MAXLNUM, &ea, flags, shortmess(SHM_FILEINFO)) != OK) { 3142 if (!aborting()) { 3143 semsg(_("E321: Could not reload \"%s\""), buf->b_fname); 3144 } 3145 if (savebuf != NULL && bufref_valid(&bufref) && buf == curbuf) { 3146 // Put the text back from the save buffer. First 3147 // delete any lines that readfile() added. 3148 while (!buf_is_empty(curbuf)) { 3149 if (ml_delete(buf->b_ml.ml_line_count) == FAIL) { 3150 break; 3151 } 3152 } 3153 move_lines(savebuf, buf); 3154 } 3155 } else if (buf == curbuf) { // "buf" still valid. 3156 // Mark the buffer as unmodified and free undo info. 3157 unchanged(buf, true, true); 3158 if ((flags & READ_KEEP_UNDO) == 0) { 3159 u_clearallandblockfree(buf); 3160 } else { 3161 // Mark all undo states as changed. 3162 u_unchanged(curbuf); 3163 } 3164 buf_updates_unload(curbuf, true); 3165 curbuf->b_mod_set = true; 3166 } 3167 } 3168 xfree(ea.cmd); 3169 3170 if (savebuf != NULL && bufref_valid(&bufref)) { 3171 wipe_buffer(savebuf, false); 3172 } 3173 3174 // Invalidate diff info if necessary. 3175 diff_invalidate(curbuf); 3176 3177 // Restore the topline and cursor position and check it (lines may 3178 // have been removed). 3179 curwin->w_topline = MIN(old_topline, curbuf->b_ml.ml_line_count); 3180 curwin->w_cursor = old_cursor; 3181 check_cursor(curwin); 3182 update_topline(curwin); 3183 curbuf->b_keep_filetype = false; 3184 3185 // Update folds unless they are defined manually. 3186 FOR_ALL_TAB_WINDOWS(tp, wp) { 3187 if (wp->w_buffer == curwin->w_buffer 3188 && !foldmethodIsManual(wp)) { 3189 foldUpdateAll(wp); 3190 } 3191 } 3192 3193 // If the mode didn't change and 'readonly' was set, keep the old 3194 // value; the user probably used the ":view" command. But don't 3195 // reset it, might have had a read error. 3196 if (orig_mode == curbuf->b_orig_mode) { 3197 curbuf->b_p_ro |= old_ro; 3198 } 3199 3200 // Modelines must override settings done by autocommands. 3201 do_modelines(0); 3202 3203 // restore curwin/curbuf and a few other things 3204 aucmd_restbuf(&aco); 3205 // Careful: autocommands may have made "buf" invalid! 3206 } 3207 3208 void buf_store_file_info(buf_T *buf, FileInfo *file_info) 3209 FUNC_ATTR_NONNULL_ALL 3210 { 3211 buf->b_mtime = file_info->stat.st_mtim.tv_sec; 3212 buf->b_mtime_ns = file_info->stat.st_mtim.tv_nsec; 3213 buf->b_orig_size = os_fileinfo_size(file_info); 3214 buf->b_orig_mode = (int)file_info->stat.st_mode; 3215 } 3216 3217 /// Adjust the line with missing eol, used for the next write. 3218 /// Used for do_filter(), when the input lines for the filter are deleted. 3219 void write_lnum_adjust(linenr_T offset) 3220 { 3221 if (curbuf->b_no_eol_lnum != 0) { // only if there is a missing eol 3222 curbuf->b_no_eol_lnum += offset; 3223 } 3224 } 3225 3226 #if defined(BACKSLASH_IN_FILENAME) 3227 /// Convert all backslashes in fname to forward slashes in-place, 3228 /// unless when it looks like a URL. 3229 void forward_slash(char *fname) 3230 { 3231 if (path_with_url(fname)) { 3232 return; 3233 } 3234 for (char *p = fname; *p != NUL; p++) { 3235 if (*p == '\\') { 3236 *p = '/'; 3237 } 3238 } 3239 } 3240 #endif 3241 3242 /// Path to Nvim's own temp dir. Ends in a slash. 3243 static char *vim_tempdir = NULL; 3244 #ifdef HAVE_DIRFD_AND_FLOCK 3245 DIR *vim_tempdir_dp = NULL; ///< File descriptor of temp dir 3246 #endif 3247 3248 /// Creates a directory for private use by this instance of Nvim, trying each of 3249 /// `TEMP_DIR_NAMES` until one succeeds. 3250 /// 3251 /// Only done once, the same directory is used for all temp files. 3252 /// This method avoids security problems because of symlink attacks et al. 3253 /// It's also a bit faster, because we only need to check for an existing 3254 /// file when creating the directory and not for each temp file. 3255 static void vim_mktempdir(void) 3256 { 3257 static const char *temp_dirs[] = TEMP_DIR_NAMES; // Try each of these until one succeeds. 3258 char tmp[TEMP_FILE_PATH_MAXLEN]; 3259 char path[TEMP_FILE_PATH_MAXLEN]; 3260 char user[40] = { 0 }; 3261 3262 os_get_username(user, sizeof(user)); 3263 // Usernames may contain slashes! #19240 3264 memchrsub(user, '/', '_', sizeof(user)); 3265 memchrsub(user, '\\', '_', sizeof(user)); 3266 3267 // Make sure the umask doesn't remove the executable bit. 3268 // "repl" has been reported to use "0177". 3269 mode_t umask_save = umask(0077); 3270 for (size_t i = 0; i < ARRAY_SIZE(temp_dirs); i++) { 3271 // Expand environment variables, leave room for "/tmp/nvim.<user>/XXXXXX/999999999". 3272 size_t tmplen = expand_env((char *)temp_dirs[i], tmp, TEMP_FILE_PATH_MAXLEN - 64); 3273 if (!os_isdir(tmp)) { 3274 if (strequal("$TMPDIR", temp_dirs[i])) { 3275 if (!os_env_exists("TMPDIR", true)) { 3276 DLOG("$TMPDIR is unset"); 3277 } else { 3278 WLOG("$TMPDIR tempdir not a directory (or does not exist): \"%s\"", tmp); 3279 } 3280 } 3281 continue; 3282 } 3283 3284 // "/tmp/" exists, now try to create "/tmp/nvim.<user>/". 3285 if (!after_pathsep(tmp, tmp + tmplen)) { 3286 tmplen += (size_t)vim_snprintf(tmp + tmplen, sizeof(tmp) - tmplen, PATHSEPSTR); 3287 assert(tmplen < sizeof(tmp)); 3288 } 3289 tmplen += (size_t)vim_snprintf(tmp + tmplen, sizeof(tmp) - tmplen, 3290 "nvim.%s", user); 3291 assert(tmplen < sizeof(tmp)); 3292 os_mkdir(tmp, 0700); // Always create, to avoid a race. 3293 bool owned = os_file_owned(tmp); 3294 bool isdir = os_isdir(tmp); 3295 #ifdef UNIX 3296 int perm = os_getperm(tmp); // XDG_RUNTIME_DIR must be owned by the user, mode 0700. 3297 bool valid = isdir && owned && 0700 == (perm & 0777); 3298 #else 3299 bool valid = isdir && owned; // TODO(justinmk): Windows ACL? 3300 #endif 3301 if (valid) { 3302 if (!after_pathsep(tmp, tmp + tmplen)) { 3303 tmplen += (size_t)vim_snprintf(tmp + tmplen, sizeof(tmp) - tmplen, PATHSEPSTR); 3304 assert(tmplen < sizeof(tmp)); 3305 } 3306 } else { 3307 if (!owned) { 3308 ELOG("tempdir root not owned by current user (%s): %s", user, tmp); 3309 } else if (!isdir) { 3310 ELOG("tempdir root not a directory: %s", tmp); 3311 } 3312 #ifdef UNIX 3313 if (0700 != (perm & 0777)) { 3314 ELOG("tempdir root has invalid permissions (%o): %s", perm, tmp); 3315 } 3316 #endif 3317 // If our "root" tempdir is invalid or fails, proceed without "<user>/". 3318 // Else user1 could break user2 by creating "/tmp/nvim.user2/". 3319 tmplen -= strlen(user); 3320 tmp[tmplen] = NUL; 3321 } 3322 3323 // Now try to create "/tmp/nvim.<user>/XXXXXX". 3324 // "XXXXXX" is mkdtemp "template", will be replaced with random alphanumeric chars. 3325 tmplen += (size_t)vim_snprintf(tmp + tmplen, sizeof(tmp) - tmplen, "XXXXXX"); 3326 assert(tmplen < sizeof(tmp)); 3327 (void)tmplen; 3328 int r = os_mkdtemp(tmp, path); 3329 if (r != 0) { 3330 WLOG("tempdir create failed: %s: %s", os_strerror(r), tmp); 3331 continue; 3332 } 3333 3334 if (vim_settempdir(path)) { 3335 // Successfully created and set temporary directory so stop trying. 3336 break; 3337 } 3338 // Couldn't set `vim_tempdir` to `path` so remove created directory. 3339 os_rmdir(path); 3340 } 3341 umask(umask_save); 3342 } 3343 3344 /// Core part of "readdir()" function. 3345 /// Retrieve the list of files/directories of "path" into "gap". 3346 /// 3347 /// @return OK for success, FAIL for failure. 3348 int readdir_core(garray_T *gap, const char *path, void *context, CheckItem checkitem) 3349 FUNC_ATTR_NONNULL_ARG(1, 2) 3350 { 3351 ga_init(gap, (int)sizeof(char *), 20); 3352 3353 Directory dir; 3354 if (!os_scandir(&dir, path)) { 3355 smsg(0, _(e_notopen), path); 3356 return FAIL; 3357 } 3358 3359 while (true) { 3360 const char *p = os_scandir_next(&dir); 3361 if (p == NULL) { 3362 break; 3363 } 3364 3365 bool ignore = (p[0] == '.' && (p[1] == NUL || (p[1] == '.' && p[2] == NUL))); 3366 if (!ignore && checkitem != NULL) { 3367 varnumber_T r = checkitem(context, p); 3368 if (r < 0) { 3369 break; 3370 } 3371 if (r == 0) { 3372 ignore = true; 3373 } 3374 } 3375 3376 if (!ignore) { 3377 ga_grow(gap, 1); 3378 ((char **)gap->ga_data)[gap->ga_len++] = xstrdup(p); 3379 } 3380 } 3381 3382 os_closedir(&dir); 3383 3384 if (gap->ga_len > 0) { 3385 sort_strings(gap->ga_data, gap->ga_len); 3386 } 3387 3388 return OK; 3389 } 3390 3391 /// Delete "name" and everything in it, recursively. 3392 /// 3393 /// @param name The path which should be deleted. 3394 /// 3395 /// @return 0 for success, -1 if some file was not deleted. 3396 int delete_recursive(const char *name) 3397 FUNC_ATTR_NONNULL_ALL 3398 { 3399 int result = 0; 3400 3401 if (os_isrealdir(name)) { 3402 char *exp = xstrdup(name); 3403 garray_T ga; 3404 if (readdir_core(&ga, exp, NULL, NULL) == OK) { 3405 int len = snprintf(NameBuff, MAXPATHL, "%s/", exp); 3406 3407 for (int i = 0; i < ga.ga_len; i++) { 3408 snprintf(NameBuff + len, MAXPATHL - (size_t)len, "%s", ((char **)ga.ga_data)[i]); 3409 if (delete_recursive(NameBuff) != 0) { 3410 // Remember the failure but continue deleting any further 3411 // entries. 3412 result = -1; 3413 } 3414 } 3415 ga_clear_strings(&ga); 3416 if (os_rmdir(exp) != 0) { 3417 result = -1; 3418 } 3419 } else { 3420 result = -1; 3421 } 3422 xfree(exp); 3423 } else { 3424 // Delete symlink only. 3425 result = os_remove(name) == 0 ? 0 : -1; 3426 } 3427 3428 return result; 3429 } 3430 3431 #ifdef HAVE_DIRFD_AND_FLOCK 3432 /// Open temporary directory and take file lock to prevent 3433 /// to be auto-cleaned. 3434 static void vim_opentempdir(void) 3435 { 3436 if (vim_tempdir_dp != NULL) { 3437 return; 3438 } 3439 3440 DIR *dp = opendir(vim_tempdir); 3441 if (dp == NULL) { 3442 return; 3443 } 3444 3445 vim_tempdir_dp = dp; 3446 flock(dirfd(vim_tempdir_dp), LOCK_SH); 3447 } 3448 3449 /// Close temporary directory - it automatically release file lock. 3450 static void vim_closetempdir(void) 3451 { 3452 if (vim_tempdir_dp == NULL) { 3453 return; 3454 } 3455 3456 closedir(vim_tempdir_dp); 3457 vim_tempdir_dp = NULL; 3458 } 3459 #endif 3460 3461 /// Delete the temp directory and all files it contains. 3462 void vim_deltempdir(void) 3463 { 3464 if (vim_tempdir == NULL) { 3465 return; 3466 } 3467 3468 #ifdef HAVE_DIRFD_AND_FLOCK 3469 vim_closetempdir(); 3470 #endif 3471 // remove the trailing path separator 3472 path_tail(vim_tempdir)[-1] = NUL; 3473 delete_recursive(vim_tempdir); 3474 XFREE_CLEAR(vim_tempdir); 3475 } 3476 3477 /// Gets path to Nvim's own temp dir (ending with slash). 3478 /// 3479 /// Creates the directory on the first call. 3480 char *vim_gettempdir(void) 3481 { 3482 static int notfound = 0; 3483 if (vim_tempdir == NULL || !os_isdir(vim_tempdir)) { 3484 if (vim_tempdir != NULL) { 3485 notfound++; 3486 if (notfound == 1) { 3487 ELOG("tempdir disappeared (antivirus or broken cleanup job?): %s", vim_tempdir); 3488 } 3489 if (notfound > 1) { 3490 msg_schedule_semsg("E5431: tempdir disappeared (%d times)", notfound); 3491 } 3492 XFREE_CLEAR(vim_tempdir); 3493 } 3494 vim_mktempdir(); 3495 } 3496 return vim_tempdir; 3497 } 3498 3499 /// Sets Nvim's own temporary directory name to `tempdir`. This directory must 3500 /// already exist. Expands the name to a full path and put it in `vim_tempdir`. 3501 /// This avoids that using `:cd` would confuse us. 3502 /// 3503 /// @param tempdir must be no longer than MAXPATHL. 3504 /// 3505 /// @return false if we run out of memory. 3506 static bool vim_settempdir(char *tempdir) 3507 { 3508 char *buf = verbose_try_malloc(MAXPATHL + 2); 3509 if (buf == NULL) { 3510 return false; 3511 } 3512 3513 vim_FullName(tempdir, buf, MAXPATHL, false); 3514 size_t buflen = strlen(buf); 3515 if (!after_pathsep(buf, buf + buflen)) { 3516 strcpy(buf + buflen, PATHSEPSTR); // NOLINT(runtime/printf) 3517 buflen += STRLEN_LITERAL(PATHSEPSTR); 3518 } 3519 vim_tempdir = xmemdupz(buf, buflen); 3520 #ifdef HAVE_DIRFD_AND_FLOCK 3521 vim_opentempdir(); 3522 #endif 3523 xfree(buf); 3524 return true; 3525 } 3526 3527 /// Return a unique name that can be used for a temp file. 3528 /// 3529 /// @note The temp file is NOT created. 3530 /// 3531 /// @return pointer to the temp file name or NULL if Nvim can't create 3532 /// temporary directory for its own temporary files. 3533 char *vim_tempname(void) 3534 { 3535 // Temp filename counter. 3536 static uint64_t temp_count; 3537 3538 char *tempdir = vim_gettempdir(); 3539 if (!tempdir) { 3540 return NULL; 3541 } 3542 3543 // There is no need to check if the file exists, because we own the directory 3544 // and nobody else creates a file in it. 3545 char templ[TEMP_FILE_PATH_MAXLEN]; 3546 int itmplen = snprintf(templ, TEMP_FILE_PATH_MAXLEN, "%s%" PRIu64, tempdir, temp_count++); 3547 return xmemdupz(templ, (size_t)itmplen); 3548 } 3549 3550 /// Tries matching a filename with a "pattern" ("prog" is NULL), or use the 3551 /// precompiled regprog "prog" ("pattern" is NULL). That avoids calling 3552 /// vim_regcomp() often. 3553 /// 3554 /// Used for autocommands and 'wildignore'. 3555 /// 3556 /// @param pattern pattern to match with 3557 /// @param prog pre-compiled regprog or NULL 3558 /// @param fname full path of the file name 3559 /// @param sfname short file name or NULL 3560 /// @param tail tail of the path 3561 /// @param allow_dirs Allow matching with dir 3562 /// 3563 /// @return true if there is a match, false otherwise 3564 bool match_file_pat(char *pattern, regprog_T **prog, char *fname, char *sfname, char *tail, 3565 int allow_dirs) 3566 { 3567 regmatch_T regmatch; 3568 bool result = false; 3569 3570 regmatch.rm_ic = p_fic; // ignore case if 'fileignorecase' is set 3571 regmatch.regprog = prog != NULL ? *prog : vim_regcomp(pattern, RE_MAGIC); 3572 3573 // Try for a match with the pattern with: 3574 // 1. the full file name, when the pattern has a '/'. 3575 // 2. the short file name, when the pattern has a '/'. 3576 // 3. the tail of the file name, when the pattern has no '/'. 3577 if (regmatch.regprog != NULL 3578 && ((allow_dirs 3579 && (vim_regexec(®match, fname, 0) 3580 || (sfname != NULL 3581 && vim_regexec(®match, sfname, 0)))) 3582 || (!allow_dirs && vim_regexec(®match, tail, 0)))) { 3583 result = true; 3584 } 3585 3586 if (prog != NULL) { 3587 *prog = regmatch.regprog; 3588 } else { 3589 vim_regfree(regmatch.regprog); 3590 } 3591 return result; 3592 } 3593 3594 /// Check if a file matches with a pattern in "list". 3595 /// "list" is a comma-separated list of patterns, like 'wildignore'. 3596 /// "sfname" is the short file name or NULL, "ffname" the long file name. 3597 /// 3598 /// @param list list of patterns to match 3599 /// @param sfname short file name 3600 /// @param ffname full file name 3601 /// 3602 /// @return true if there was a match 3603 bool match_file_list(char *list, char *sfname, char *ffname) 3604 FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ARG(1, 3) 3605 { 3606 char *tail = path_tail(sfname); 3607 3608 // try all patterns in 'wildignore' 3609 char *p = list; 3610 while (*p) { 3611 char buf[MAXPATHL]; 3612 copy_option_part(&p, buf, ARRAY_SIZE(buf), ","); 3613 char allow_dirs; 3614 char *regpat = file_pat_to_reg_pat(buf, NULL, &allow_dirs, false); 3615 if (regpat == NULL) { 3616 break; 3617 } 3618 bool match = match_file_pat(regpat, NULL, ffname, sfname, tail, (int)allow_dirs); 3619 xfree(regpat); 3620 if (match) { 3621 return true; 3622 } 3623 } 3624 return false; 3625 } 3626 3627 /// Convert the given pattern "pat" which has shell style wildcards in it, into 3628 /// a regular expression, and return the result in allocated memory. If there 3629 /// is a directory path separator to be matched, then true is put in 3630 /// allow_dirs, otherwise false is put there -- webb. 3631 /// Handle backslashes before special characters, like "\*" and "\ ". 3632 /// 3633 /// @param pat_end first char after pattern or NULL 3634 /// @param allow_dirs Result passed back out in here 3635 /// @param no_bslash Don't use a backward slash as pathsep 3636 /// (only makes a difference when BACKSLASH_IN_FILENAME in defined) 3637 /// 3638 /// @return NULL on failure. 3639 char *file_pat_to_reg_pat(const char *pat, const char *pat_end, char *allow_dirs, int no_bslash) 3640 FUNC_ATTR_NONNULL_ARG(1) 3641 { 3642 if (allow_dirs != NULL) { 3643 *allow_dirs = false; 3644 } 3645 3646 if (pat_end == NULL) { 3647 pat_end = pat + strlen(pat); 3648 } 3649 3650 if (pat_end == pat) { 3651 return xstrdup("^$"); 3652 } 3653 3654 size_t size = 2; // '^' at start, '$' at end. 3655 3656 for (const char *p = pat; p < pat_end; p++) { 3657 switch (*p) { 3658 case '*': 3659 case '.': 3660 case ',': 3661 case '{': 3662 case '}': 3663 case '~': 3664 size += 2; // extra backslash 3665 break; 3666 #ifdef BACKSLASH_IN_FILENAME 3667 case '\\': 3668 case '/': 3669 size += 4; // could become "[\/]" 3670 break; 3671 #endif 3672 default: 3673 size++; 3674 break; 3675 } 3676 } 3677 char *reg_pat = xmalloc(size + 1); 3678 3679 size_t i = 0; 3680 3681 if (pat[0] == '*') { 3682 while (pat[0] == '*' && pat < pat_end - 1) { 3683 pat++; 3684 } 3685 } else { 3686 reg_pat[i++] = '^'; 3687 } 3688 const char *endp = pat_end - 1; 3689 bool add_dollar = true; 3690 if (endp >= pat && *endp == '*') { 3691 while (endp - pat > 0 && *endp == '*') { 3692 endp--; 3693 } 3694 add_dollar = false; 3695 } 3696 int nested = 0; 3697 for (const char *p = pat; *p && nested >= 0 && p <= endp; p++) { 3698 switch (*p) { 3699 case '*': 3700 reg_pat[i++] = '.'; 3701 reg_pat[i++] = '*'; 3702 while (p[1] == '*') { // "**" matches like "*" 3703 p++; 3704 } 3705 break; 3706 case '.': 3707 case '~': 3708 reg_pat[i++] = '\\'; 3709 reg_pat[i++] = *p; 3710 break; 3711 case '?': 3712 reg_pat[i++] = '.'; 3713 break; 3714 case '\\': 3715 if (p[1] == NUL) { 3716 break; 3717 } 3718 #ifdef BACKSLASH_IN_FILENAME 3719 if (!no_bslash) { 3720 // translate: 3721 // "\x" to "\\x" e.g., "dir\file" 3722 // "\*" to "\\.*" e.g., "dir\*.c" 3723 // "\?" to "\\." e.g., "dir\??.c" 3724 // "\+" to "\+" e.g., "fileX\+.c" 3725 if ((vim_isfilec((uint8_t)p[1]) || p[1] == '*' || p[1] == '?') 3726 && p[1] != '+') { 3727 reg_pat[i++] = '['; 3728 reg_pat[i++] = '\\'; 3729 reg_pat[i++] = '/'; 3730 reg_pat[i++] = ']'; 3731 if (allow_dirs != NULL) { 3732 *allow_dirs = true; 3733 } 3734 break; 3735 } 3736 } 3737 #endif 3738 // Undo escaping from ExpandEscape(): 3739 // foo\?bar -> foo?bar 3740 // foo\%bar -> foo%bar 3741 // foo\,bar -> foo,bar 3742 // foo\ bar -> foo bar 3743 // Don't unescape \, * and others that are also special in a 3744 // regexp. 3745 // An escaped { must be unescaped since we use magic not 3746 // verymagic. Use "\\\{n,m\}"" to get "\{n,m}". 3747 if (*++p == '?' && (!BACKSLASH_IN_FILENAME_BOOL || no_bslash)) { 3748 reg_pat[i++] = '?'; 3749 } else if (*p == ',' || *p == '%' || *p == '#' 3750 || ascii_isspace(*p) || *p == '{' || *p == '}') { 3751 reg_pat[i++] = *p; 3752 } else if (*p == '\\' && p[1] == '\\' && p[2] == '{') { 3753 reg_pat[i++] = '\\'; 3754 reg_pat[i++] = '{'; 3755 p += 2; 3756 } else { 3757 if (allow_dirs != NULL && vim_ispathsep(*p) 3758 && (!BACKSLASH_IN_FILENAME_BOOL || (!no_bslash || *p != '\\'))) { 3759 *allow_dirs = true; 3760 } 3761 reg_pat[i++] = '\\'; 3762 reg_pat[i++] = *p; 3763 } 3764 break; 3765 #ifdef BACKSLASH_IN_FILENAME 3766 case '/': 3767 reg_pat[i++] = '['; 3768 reg_pat[i++] = '\\'; 3769 reg_pat[i++] = '/'; 3770 reg_pat[i++] = ']'; 3771 if (allow_dirs != NULL) { 3772 *allow_dirs = true; 3773 } 3774 break; 3775 #endif 3776 case '{': 3777 reg_pat[i++] = '\\'; 3778 reg_pat[i++] = '('; 3779 nested++; 3780 break; 3781 case '}': 3782 reg_pat[i++] = '\\'; 3783 reg_pat[i++] = ')'; 3784 nested--; 3785 break; 3786 case ',': 3787 if (nested) { 3788 reg_pat[i++] = '\\'; 3789 reg_pat[i++] = '|'; 3790 } else { 3791 reg_pat[i++] = ','; 3792 } 3793 break; 3794 default: 3795 if (allow_dirs != NULL && vim_ispathsep(*p)) { 3796 *allow_dirs = true; 3797 } 3798 reg_pat[i++] = *p; 3799 break; 3800 } 3801 } 3802 if (add_dollar) { 3803 reg_pat[i++] = '$'; 3804 } 3805 reg_pat[i] = NUL; 3806 if (nested != 0) { 3807 if (nested < 0) { 3808 emsg(_("E219: Missing {.")); 3809 } else { 3810 emsg(_("E220: Missing }.")); 3811 } 3812 XFREE_CLEAR(reg_pat); 3813 } 3814 return reg_pat; 3815 } 3816 3817 #if defined(EINTR) 3818 3819 // Type of buffer size argument of read() and write() is platform-dependent. 3820 # ifdef MSWIN 3821 # define BUFSIZE(x) (unsigned)(x) 3822 # else 3823 # define BUFSIZE(x) (x) 3824 # endif 3825 3826 /// Version of read() that retries when interrupted by EINTR (possibly 3827 /// by a SIGWINCH). 3828 ssize_t read_eintr(int fd, void *buf, size_t bufsize) 3829 { 3830 ssize_t ret; 3831 3832 while (true) { 3833 ret = read(fd, buf, BUFSIZE(bufsize)); 3834 if (ret >= 0 || errno != EINTR) { 3835 break; 3836 } 3837 } 3838 return ret; 3839 } 3840 3841 /// Version of write() that retries when interrupted by EINTR (possibly 3842 /// by a SIGWINCH). 3843 ssize_t write_eintr(int fd, void *buf, size_t bufsize) 3844 { 3845 ssize_t ret = 0; 3846 3847 // Repeat the write() so long it didn't fail, other than being interrupted 3848 // by a signal. 3849 while ((size_t)ret < bufsize) { 3850 ssize_t wlen = write(fd, (char *)buf + ret, BUFSIZE(bufsize - (size_t)ret)); 3851 if (wlen < 0) { 3852 if (errno != EINTR) { 3853 break; 3854 } 3855 } else { 3856 ret += wlen; 3857 } 3858 } 3859 return ret; 3860 } 3861 3862 # undef BUFSIZE 3863 3864 #endif