storagedir.c (17203B)
1 /* Copyright (c) 2017-2021, The Tor Project, Inc. */ 2 /* See LICENSE for licensing information */ 3 4 /** 5 * \file storagedir.c 6 * 7 * \brief An abstraction for a directory full of similar files. 8 * 9 * Storagedirs are used by our consensus cache code, and may someday also get 10 * used for unparseable objects. A large part of the need for this type is to 11 * work around the limitations in our sandbox code, where all filenames need 12 * to be registered in advance. 13 **/ 14 15 #include "lib/fs/storagedir.h" 16 17 #include "lib/container/smartlist.h" 18 #include "lib/encoding/confline.h" 19 #include "lib/fs/dir.h" 20 #include "lib/fs/files.h" 21 #include "lib/fs/mmap.h" 22 #include "lib/log/escape.h" 23 #include "lib/log/log.h" 24 #include "lib/log/util_bug.h" 25 #include "lib/malloc/malloc.h" 26 #include "lib/memarea/memarea.h" 27 #include "lib/sandbox/sandbox.h" 28 #include "lib/string/printf.h" 29 #include "lib/string/util_string.h" 30 31 #ifdef HAVE_SYS_TYPES_H 32 #include <sys/types.h> 33 #endif 34 #ifdef HAVE_SYS_STAT_H 35 #include <sys/stat.h> 36 #endif 37 #ifdef HAVE_UNISTD_H 38 #include <unistd.h> 39 #endif 40 #include <stdlib.h> 41 #include <errno.h> 42 #include <string.h> 43 44 #define FNAME_MIN_NUM 1000 45 46 /** A storage_dir_t represents a directory full of similar cached 47 * files. Filenames are decimal integers. Files can be cleaned as needed 48 * to limit total disk usage. */ 49 struct storage_dir_t { 50 /** Directory holding the files for this storagedir. */ 51 char *directory; 52 /** Either NULL, or a directory listing of the directory (as a smartlist 53 * of strings */ 54 smartlist_t *contents; 55 /** The largest number of non-temporary files we'll place in the 56 * directory. */ 57 int max_files; 58 /** If true, then 'usage' has been computed. */ 59 int usage_known; 60 /** The total number of bytes used in this directory */ 61 uint64_t usage; 62 }; 63 64 /** Create or open a new storage directory at <b>dirname</b>, with 65 * capacity for up to <b>max_files</b> files. 66 */ 67 storage_dir_t * 68 storage_dir_new(const char *dirname, int max_files) 69 { 70 if (check_private_dir(dirname, CPD_CREATE, NULL) < 0) 71 return NULL; 72 73 storage_dir_t *d = tor_malloc_zero(sizeof(storage_dir_t)); 74 d->directory = tor_strdup(dirname); 75 d->max_files = max_files; 76 return d; 77 } 78 79 /** 80 * Drop all in-RAM storage for <b>d</b>. Does not delete any files. 81 */ 82 void 83 storage_dir_free_(storage_dir_t *d) 84 { 85 if (d == NULL) 86 return; 87 tor_free(d->directory); 88 if (d->contents) { 89 SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp)); 90 smartlist_free(d->contents); 91 } 92 tor_free(d); 93 } 94 95 /** 96 * Tell the sandbox (if any) configured by <b>cfg</b> to allow the 97 * operations that <b>d</b> will need. 98 * 99 * The presence of this function is why we need an upper limit on the 100 * number of files in a storage_dir_t: we need to approve file operations 101 * one by one. 102 */ 103 int 104 storage_dir_register_with_sandbox(storage_dir_t *d, sandbox_cfg_t **cfg) 105 { 106 int problems = 0; 107 int idx; 108 for (idx = FNAME_MIN_NUM; idx < FNAME_MIN_NUM + d->max_files; ++idx) { 109 char *path = NULL, *tmppath = NULL; 110 tor_asprintf(&path, "%s/%d", d->directory, idx); 111 tor_asprintf(&tmppath, "%s/%d.tmp", d->directory, idx); 112 113 problems += sandbox_cfg_allow_open_filename(cfg, tor_strdup(path)); 114 problems += sandbox_cfg_allow_open_filename(cfg, tor_strdup(tmppath)); 115 problems += sandbox_cfg_allow_stat_filename(cfg, tor_strdup(path)); 116 problems += sandbox_cfg_allow_stat_filename(cfg, tor_strdup(tmppath)); 117 problems += sandbox_cfg_allow_rename(cfg, 118 tor_strdup(tmppath), tor_strdup(path)); 119 120 tor_free(path); 121 tor_free(tmppath); 122 } 123 124 return problems ? -1 : 0; 125 } 126 127 /** 128 * Remove all files in <b>d</b> whose names end with ".tmp". 129 * 130 * Requires that the contents field of <b>d</b> is set. 131 */ 132 static void 133 storage_dir_clean_tmpfiles(storage_dir_t *d) 134 { 135 if (!d->contents) 136 return; 137 SMARTLIST_FOREACH_BEGIN(d->contents, char *, fname) { 138 if (strcmpend(fname, ".tmp")) 139 continue; 140 char *path = NULL; 141 tor_asprintf(&path, "%s/%s", d->directory, fname); 142 if (unlink(sandbox_intern_string(path))) { 143 log_warn(LD_FS, "Unable to unlink %s while cleaning " 144 "temporary files: %s", escaped(path), strerror(errno)); 145 tor_free(path); 146 continue; 147 } 148 tor_free(path); 149 SMARTLIST_DEL_CURRENT(d->contents, fname); 150 tor_free(fname); 151 } SMARTLIST_FOREACH_END(fname); 152 153 d->usage_known = 0; 154 } 155 156 /** 157 * Re-scan the directory <b>d</b> to learn its contents. 158 */ 159 static int 160 storage_dir_rescan(storage_dir_t *d) 161 { 162 if (d->contents) { 163 SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp)); 164 smartlist_free(d->contents); 165 } 166 d->usage = 0; 167 d->usage_known = 0; 168 if (NULL == (d->contents = tor_listdir(d->directory))) { 169 return -1; 170 } 171 storage_dir_clean_tmpfiles(d); 172 return 0; 173 } 174 175 /** 176 * Return a smartlist containing the filenames within <b>d</b>. 177 */ 178 const smartlist_t * 179 storage_dir_list(storage_dir_t *d) 180 { 181 if (! d->contents) 182 storage_dir_rescan(d); 183 return d->contents; 184 } 185 186 /** 187 * Return the total number of bytes used for storage in <b>d</b>. 188 */ 189 uint64_t 190 storage_dir_get_usage(storage_dir_t *d) 191 { 192 if (d->usage_known) 193 return d->usage; 194 195 uint64_t total = 0; 196 SMARTLIST_FOREACH_BEGIN(storage_dir_list(d), const char *, cp) { 197 char *path = NULL; 198 struct stat st; 199 tor_asprintf(&path, "%s/%s", d->directory, cp); 200 if (stat(sandbox_intern_string(path), &st) == 0) { 201 total += st.st_size; 202 } 203 tor_free(path); 204 } SMARTLIST_FOREACH_END(cp); 205 206 d->usage = total; 207 d->usage_known = 1; 208 return d->usage; 209 } 210 211 /** Mmap a specified file within <b>d</b>. 212 * 213 * On failure, return NULL and set errno as for tor_mmap_file(). */ 214 tor_mmap_t * 215 storage_dir_map(storage_dir_t *d, const char *fname) 216 { 217 char *path = NULL; 218 tor_asprintf(&path, "%s/%s", d->directory, fname); 219 tor_mmap_t *result = tor_mmap_file(path); 220 int errval = errno; 221 tor_free(path); 222 if (result == NULL) 223 errno = errval; 224 return result; 225 } 226 227 /** Read a file within <b>d</b> into a newly allocated buffer. Set 228 * *<b>sz_out</b> to its size. */ 229 uint8_t * 230 storage_dir_read(storage_dir_t *d, const char *fname, int bin, size_t *sz_out) 231 { 232 const int flags = bin ? RFTS_BIN : 0; 233 234 char *path = NULL; 235 tor_asprintf(&path, "%s/%s", d->directory, fname); 236 struct stat st; 237 char *contents = read_file_to_str(path, flags, &st); 238 if (contents && sz_out) { 239 // it fits in RAM, so we know its size is less than SIZE_MAX 240 #if UINT64_MAX > SIZE_MAX 241 tor_assert((uint64_t)st.st_size <= SIZE_MAX); 242 #endif 243 *sz_out = (size_t) st.st_size; 244 } 245 246 tor_free(path); 247 return (uint8_t *) contents; 248 } 249 250 /** Helper: Find an unused filename within the directory */ 251 static char * 252 find_unused_fname(storage_dir_t *d) 253 { 254 if (!d->contents) { 255 if (storage_dir_rescan(d) < 0) 256 return NULL; 257 } 258 259 char buf[16]; 260 int i; 261 /* Yuck; this is quadratic. Fortunately, that shouldn't matter much, 262 * since disk writes are more expensive by a lot. */ 263 for (i = FNAME_MIN_NUM; i < FNAME_MIN_NUM + d->max_files; ++i) { 264 tor_snprintf(buf, sizeof(buf), "%d", i); 265 if (!smartlist_contains_string(d->contents, buf)) { 266 return tor_strdup(buf); 267 } 268 } 269 return NULL; 270 } 271 272 /** Helper: As storage_dir_save_bytes_to_file, but store a smartlist of 273 * sized_chunk_t rather than a single byte array. */ 274 static int 275 storage_dir_save_chunks_to_file(storage_dir_t *d, 276 const smartlist_t *chunks, 277 int binary, 278 char **fname_out) 279 { 280 uint64_t total_length = 0; 281 char *fname = find_unused_fname(d); 282 if (!fname) 283 return -1; 284 285 SMARTLIST_FOREACH(chunks, const sized_chunk_t *, ch, 286 total_length += ch->len); 287 288 char *path = NULL; 289 tor_asprintf(&path, "%s/%s", d->directory, fname); 290 291 int r = write_chunks_to_file(path, chunks, binary, 0); 292 if (r == 0) { 293 if (d->usage_known) 294 d->usage += total_length; 295 if (fname_out) { 296 *fname_out = tor_strdup(fname); 297 } 298 if (d->contents) 299 smartlist_add(d->contents, tor_strdup(fname)); 300 } 301 tor_free(fname); 302 tor_free(path); 303 return r; 304 } 305 306 /** Try to write the <b>length</b> bytes at <b>data</b> into a new file 307 * in <b>d</b>. On success, return 0 and set *<b>fname_out</b> to a 308 * newly allocated string containing the filename. On failure, return 309 * -1. */ 310 int 311 storage_dir_save_bytes_to_file(storage_dir_t *d, 312 const uint8_t *data, 313 size_t length, 314 int binary, 315 char **fname_out) 316 { 317 smartlist_t *chunks = smartlist_new(); 318 sized_chunk_t chunk = { (const char *)data, length }; 319 smartlist_add(chunks, &chunk); 320 int r = storage_dir_save_chunks_to_file(d, chunks, binary, fname_out); 321 smartlist_free(chunks); 322 return r; 323 } 324 325 /** 326 * As storage_dir_save_bytes_to_file, but saves a NUL-terminated string 327 * <b>str</b>. 328 */ 329 int 330 storage_dir_save_string_to_file(storage_dir_t *d, 331 const char *str, 332 int binary, 333 char **fname_out) 334 { 335 return storage_dir_save_bytes_to_file(d, 336 (const uint8_t*)str, strlen(str), binary, fname_out); 337 } 338 339 /** 340 * As storage_dir_save_bytes_to_file, but associates the data with the 341 * key-value pairs in <b>labels</b>. Files stored in this format can be 342 * recovered with storage_dir_map_labeled() or storage_dir_read_labeled(). 343 */ 344 int 345 storage_dir_save_labeled_to_file(storage_dir_t *d, 346 const config_line_t *labels, 347 const uint8_t *data, 348 size_t length, 349 char **fname_out) 350 { 351 /* 352 * The storage format is to prefix the data with the key-value pairs in 353 * <b>labels</b>, and a single NUL separator. But code outside this module 354 * MUST NOT rely on that format. 355 */ 356 357 smartlist_t *chunks = smartlist_new(); 358 memarea_t *area = memarea_new(); 359 const config_line_t *line; 360 for (line = labels; line; line = line->next) { 361 sized_chunk_t *sz = memarea_alloc(area, sizeof(sized_chunk_t)); 362 sz->len = strlen(line->key) + 1 + strlen(line->value) + 1; 363 const size_t allocated = sz->len + 1; 364 char *bytes = memarea_alloc(area, allocated); 365 tor_snprintf(bytes, allocated, "%s %s\n", line->key, line->value); 366 sz->bytes = bytes; 367 smartlist_add(chunks, sz); 368 } 369 370 sized_chunk_t *nul = memarea_alloc(area, sizeof(sized_chunk_t)); 371 nul->len = 1; 372 nul->bytes = "\0"; 373 smartlist_add(chunks, nul); 374 375 sized_chunk_t *datachunk = memarea_alloc(area, sizeof(sized_chunk_t)); 376 datachunk->bytes = (const char *)data; 377 datachunk->len = length; 378 smartlist_add(chunks, datachunk); 379 380 int r = storage_dir_save_chunks_to_file(d, chunks, 1, fname_out); 381 smartlist_free(chunks); 382 memarea_drop_all(area); 383 return r; 384 } 385 386 /** 387 * Map a file that was created with storage_dir_save_labeled_to_file(). On 388 * failure, return NULL. On success, write a set of newly allocated labels 389 * into *<b>labels_out</b>, a pointer to the data into *<b>data_out</b>, and 390 * the data's size into *<b>sz_out</b>. On success, also return a tor_mmap_t 391 * object whose contents should not be used -- it needs to be kept around, 392 * though, for as long as <b>data_out</b> is going to be valid. 393 * 394 * On failure, set errno as for tor_mmap_file() if the file was missing or 395 * empty, and set errno to EINVAL if the file was not in the labeled 396 * format expected. 397 */ 398 tor_mmap_t * 399 storage_dir_map_labeled(storage_dir_t *dir, 400 const char *fname, 401 config_line_t **labels_out, 402 const uint8_t **data_out, 403 size_t *sz_out) 404 { 405 tor_mmap_t *m = storage_dir_map(dir, fname); 406 int errval; 407 if (! m) { 408 errval = errno; 409 goto err; 410 } 411 const char *nulp = memchr(m->data, '\0', m->size); 412 if (! nulp) { 413 errval = EINVAL; 414 goto err; 415 } 416 if (labels_out && config_get_lines(m->data, labels_out, 0) < 0) { 417 errval = EINVAL; 418 goto err; 419 } 420 size_t offset = nulp - m->data + 1; 421 tor_assert(offset <= m->size); 422 *data_out = (const uint8_t *)(m->data + offset); 423 *sz_out = m->size - offset; 424 425 return m; 426 err: 427 tor_munmap_file(m); 428 errno = errval; 429 return NULL; 430 } 431 432 /** As storage_dir_map_labeled, but return a new byte array containing the 433 * data. */ 434 uint8_t * 435 storage_dir_read_labeled(storage_dir_t *dir, 436 const char *fname, 437 config_line_t **labels_out, 438 size_t *sz_out) 439 { 440 const uint8_t *data = NULL; 441 tor_mmap_t *m = storage_dir_map_labeled(dir, fname, labels_out, 442 &data, sz_out); 443 if (m == NULL) 444 return NULL; 445 uint8_t *result = tor_memdup(data, *sz_out); 446 tor_munmap_file(m); 447 return result; 448 } 449 450 /* Reduce the cached usage amount in <b>d</b> by <b>removed_file_size</b>. 451 * This function is a no-op if <b>d->usage_known</b> is 0. */ 452 static void 453 storage_dir_reduce_usage(storage_dir_t *d, uint64_t removed_file_size) 454 { 455 if (d->usage_known) { 456 if (! BUG(d->usage < removed_file_size)) { 457 /* This bug can also be triggered if an external process resized a file 458 * between the call to storage_dir_get_usage() that last checked 459 * actual usage (rather than relaying on cached usage), and the call to 460 * this function. */ 461 d->usage -= removed_file_size; 462 } else { 463 /* If we underflowed the cached directory size, re-check the sizes of all 464 * the files in the directory. This makes storage_dir_shrink() quadratic, 465 * but only if a process is continually changing file sizes in the 466 * storage directory (in which case, we have bigger issues). 467 * 468 * We can't just reset usage_known, because storage_dir_shrink() relies 469 * on knowing the usage. */ 470 storage_dir_rescan(d); 471 (void)storage_dir_get_usage(d); 472 } 473 } 474 } 475 476 /** 477 * Remove the file called <b>fname</b> from <b>d</b>. 478 */ 479 void 480 storage_dir_remove_file(storage_dir_t *d, 481 const char *fname) 482 { 483 char *path = NULL; 484 tor_asprintf(&path, "%s/%s", d->directory, fname); 485 const char *ipath = sandbox_intern_string(path); 486 487 uint64_t size = 0; 488 if (d->usage_known) { 489 struct stat st; 490 if (stat(ipath, &st) == 0) { 491 size = st.st_size; 492 } 493 } 494 if (unlink(ipath) == 0) { 495 storage_dir_reduce_usage(d, size); 496 } else { 497 log_warn(LD_FS, "Unable to unlink %s while removing file: %s", 498 escaped(path), strerror(errno)); 499 tor_free(path); 500 return; 501 } 502 if (d->contents) { 503 smartlist_string_remove(d->contents, fname); 504 } 505 506 tor_free(path); 507 } 508 509 /** Helper type: used to sort the members of storage directory by mtime. */ 510 typedef struct shrinking_dir_entry_t { 511 time_t mtime; 512 uint64_t size; 513 char *path; 514 } shrinking_dir_entry_t; 515 516 /** Helper: use with qsort to sort shrinking_dir_entry_t structs. */ 517 static int 518 shrinking_dir_entry_compare(const void *a_, const void *b_) 519 { 520 const shrinking_dir_entry_t *a = a_; 521 const shrinking_dir_entry_t *b = b_; 522 523 if (a->mtime < b->mtime) 524 return -1; 525 else if (a->mtime > b->mtime) 526 return 1; 527 else 528 return 0; 529 } 530 531 /** 532 * Try to free space by removing the oldest files in <b>d</b>. Delete 533 * until no more than <b>target_size</b> bytes are left, and at least 534 * <b>min_to_remove</b> files have been removed... or until there is 535 * nothing left to remove. 536 * 537 * Return 0 on success; -1 on failure. 538 */ 539 int 540 storage_dir_shrink(storage_dir_t *d, 541 uint64_t target_size, 542 int min_to_remove) 543 { 544 if (d->usage_known && d->usage <= target_size && !min_to_remove) { 545 /* Already small enough. */ 546 return 0; 547 } 548 549 if (storage_dir_rescan(d) < 0) 550 return -1; 551 552 const uint64_t orig_usage = storage_dir_get_usage(d); 553 if (orig_usage <= target_size && !min_to_remove) { 554 /* Okay, small enough after rescan! */ 555 return 0; 556 } 557 558 const int n = smartlist_len(d->contents); 559 shrinking_dir_entry_t *ents = tor_calloc(n, sizeof(shrinking_dir_entry_t)); 560 SMARTLIST_FOREACH_BEGIN(d->contents, const char *, fname) { 561 shrinking_dir_entry_t *ent = &ents[fname_sl_idx]; 562 struct stat st; 563 tor_asprintf(&ent->path, "%s/%s", d->directory, fname); 564 if (stat(sandbox_intern_string(ent->path), &st) == 0) { 565 ent->mtime = st.st_mtime; 566 ent->size = st.st_size; 567 } 568 } SMARTLIST_FOREACH_END(fname); 569 570 qsort(ents, n, sizeof(shrinking_dir_entry_t), shrinking_dir_entry_compare); 571 572 int idx = 0; 573 while ((d->usage > target_size || min_to_remove > 0) && idx < n) { 574 if (unlink(sandbox_intern_string(ents[idx].path)) == 0) { 575 storage_dir_reduce_usage(d, ents[idx].size); 576 --min_to_remove; 577 } 578 ++idx; 579 } 580 581 for (idx = 0; idx < n; ++idx) { 582 tor_free(ents[idx].path); 583 } 584 tor_free(ents); 585 586 storage_dir_rescan(d); 587 588 return 0; 589 } 590 591 /** Remove all files in <b>d</b>. */ 592 int 593 storage_dir_remove_all(storage_dir_t *d) 594 { 595 return storage_dir_shrink(d, 0, d->max_files); 596 } 597 598 /** 599 * Return the largest number of non-temporary files we're willing to 600 * store in <b>d</b>. 601 */ 602 int 603 storage_dir_get_max_files(storage_dir_t *d) 604 { 605 return d->max_files; 606 }