tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

storagedir.c (17203B)


      1 /* Copyright (c) 2017-2021, The Tor Project, Inc. */
      2 /* See LICENSE for licensing information */
      3 
      4 /**
      5 * \file storagedir.c
      6 *
      7 * \brief An abstraction for a directory full of similar files.
      8 *
      9 * Storagedirs are used by our consensus cache code, and may someday also get
     10 * used for unparseable objects. A large part of the need for this type is to
     11 * work around the limitations in our sandbox code, where all filenames need
     12 * to be registered in advance.
     13 **/
     14 
     15 #include "lib/fs/storagedir.h"
     16 
     17 #include "lib/container/smartlist.h"
     18 #include "lib/encoding/confline.h"
     19 #include "lib/fs/dir.h"
     20 #include "lib/fs/files.h"
     21 #include "lib/fs/mmap.h"
     22 #include "lib/log/escape.h"
     23 #include "lib/log/log.h"
     24 #include "lib/log/util_bug.h"
     25 #include "lib/malloc/malloc.h"
     26 #include "lib/memarea/memarea.h"
     27 #include "lib/sandbox/sandbox.h"
     28 #include "lib/string/printf.h"
     29 #include "lib/string/util_string.h"
     30 
     31 #ifdef HAVE_SYS_TYPES_H
     32 #include <sys/types.h>
     33 #endif
     34 #ifdef HAVE_SYS_STAT_H
     35 #include <sys/stat.h>
     36 #endif
     37 #ifdef HAVE_UNISTD_H
     38 #include <unistd.h>
     39 #endif
     40 #include <stdlib.h>
     41 #include <errno.h>
     42 #include <string.h>
     43 
     44 #define FNAME_MIN_NUM 1000
     45 
     46 /** A storage_dir_t represents a directory full of similar cached
     47 * files. Filenames are decimal integers. Files can be cleaned as needed
     48 * to limit total disk usage. */
     49 struct storage_dir_t {
     50  /** Directory holding the files for this storagedir. */
     51  char *directory;
     52  /** Either NULL, or a directory listing of the directory (as a smartlist
     53   * of strings */
     54  smartlist_t *contents;
     55  /** The largest number of non-temporary files we'll place in the
     56   * directory. */
     57  int max_files;
     58  /** If true, then 'usage' has been computed. */
     59  int usage_known;
     60  /** The total number of bytes used in this directory */
     61  uint64_t usage;
     62 };
     63 
     64 /** Create or open a new storage directory at <b>dirname</b>, with
     65 * capacity for up to <b>max_files</b> files.
     66 */
     67 storage_dir_t *
     68 storage_dir_new(const char *dirname, int max_files)
     69 {
     70  if (check_private_dir(dirname, CPD_CREATE, NULL) < 0)
     71    return NULL;
     72 
     73  storage_dir_t *d = tor_malloc_zero(sizeof(storage_dir_t));
     74  d->directory = tor_strdup(dirname);
     75  d->max_files = max_files;
     76  return d;
     77 }
     78 
     79 /**
     80 * Drop all in-RAM storage for <b>d</b>.  Does not delete any files.
     81 */
     82 void
     83 storage_dir_free_(storage_dir_t *d)
     84 {
     85  if (d == NULL)
     86    return;
     87  tor_free(d->directory);
     88  if (d->contents) {
     89    SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
     90    smartlist_free(d->contents);
     91  }
     92  tor_free(d);
     93 }
     94 
     95 /**
     96 * Tell the sandbox (if any) configured by <b>cfg</b> to allow the
     97 * operations that <b>d</b> will need.
     98 *
     99 * The presence of this function is why we need an upper limit on the
    100 * number of files in a storage_dir_t: we need to approve file operations
    101 * one by one.
    102 */
    103 int
    104 storage_dir_register_with_sandbox(storage_dir_t *d, sandbox_cfg_t **cfg)
    105 {
    106  int problems = 0;
    107  int idx;
    108  for (idx = FNAME_MIN_NUM; idx < FNAME_MIN_NUM + d->max_files; ++idx) {
    109    char *path = NULL, *tmppath = NULL;
    110    tor_asprintf(&path, "%s/%d", d->directory, idx);
    111    tor_asprintf(&tmppath, "%s/%d.tmp", d->directory, idx);
    112 
    113    problems += sandbox_cfg_allow_open_filename(cfg, tor_strdup(path));
    114    problems += sandbox_cfg_allow_open_filename(cfg, tor_strdup(tmppath));
    115    problems += sandbox_cfg_allow_stat_filename(cfg, tor_strdup(path));
    116    problems += sandbox_cfg_allow_stat_filename(cfg, tor_strdup(tmppath));
    117    problems += sandbox_cfg_allow_rename(cfg,
    118                                      tor_strdup(tmppath), tor_strdup(path));
    119 
    120    tor_free(path);
    121    tor_free(tmppath);
    122  }
    123 
    124  return problems ? -1 : 0;
    125 }
    126 
    127 /**
    128 * Remove all files in <b>d</b> whose names end with ".tmp".
    129 *
    130 * Requires that the contents field of <b>d</b> is set.
    131 */
    132 static void
    133 storage_dir_clean_tmpfiles(storage_dir_t *d)
    134 {
    135  if (!d->contents)
    136    return;
    137  SMARTLIST_FOREACH_BEGIN(d->contents, char *, fname) {
    138    if (strcmpend(fname, ".tmp"))
    139      continue;
    140    char *path = NULL;
    141    tor_asprintf(&path, "%s/%s", d->directory, fname);
    142    if (unlink(sandbox_intern_string(path))) {
    143      log_warn(LD_FS, "Unable to unlink %s while cleaning "
    144               "temporary files: %s", escaped(path), strerror(errno));
    145      tor_free(path);
    146      continue;
    147    }
    148    tor_free(path);
    149    SMARTLIST_DEL_CURRENT(d->contents, fname);
    150    tor_free(fname);
    151  } SMARTLIST_FOREACH_END(fname);
    152 
    153  d->usage_known = 0;
    154 }
    155 
    156 /**
    157 * Re-scan the directory <b>d</b> to learn its contents.
    158 */
    159 static int
    160 storage_dir_rescan(storage_dir_t *d)
    161 {
    162  if (d->contents) {
    163    SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
    164    smartlist_free(d->contents);
    165  }
    166  d->usage = 0;
    167  d->usage_known = 0;
    168  if (NULL == (d->contents = tor_listdir(d->directory))) {
    169    return -1;
    170  }
    171  storage_dir_clean_tmpfiles(d);
    172  return 0;
    173 }
    174 
    175 /**
    176 * Return a smartlist containing the filenames within <b>d</b>.
    177 */
    178 const smartlist_t *
    179 storage_dir_list(storage_dir_t *d)
    180 {
    181  if (! d->contents)
    182    storage_dir_rescan(d);
    183  return d->contents;
    184 }
    185 
    186 /**
    187 * Return the total number of bytes used for storage in <b>d</b>.
    188 */
    189 uint64_t
    190 storage_dir_get_usage(storage_dir_t *d)
    191 {
    192  if (d->usage_known)
    193    return d->usage;
    194 
    195  uint64_t total = 0;
    196  SMARTLIST_FOREACH_BEGIN(storage_dir_list(d), const char *, cp) {
    197    char *path = NULL;
    198    struct stat st;
    199    tor_asprintf(&path, "%s/%s", d->directory, cp);
    200    if (stat(sandbox_intern_string(path), &st) == 0) {
    201      total += st.st_size;
    202    }
    203    tor_free(path);
    204  } SMARTLIST_FOREACH_END(cp);
    205 
    206  d->usage = total;
    207  d->usage_known = 1;
    208  return d->usage;
    209 }
    210 
    211 /** Mmap a specified file within <b>d</b>.
    212 *
    213 * On failure, return NULL and set errno as for tor_mmap_file(). */
    214 tor_mmap_t *
    215 storage_dir_map(storage_dir_t *d, const char *fname)
    216 {
    217  char *path = NULL;
    218  tor_asprintf(&path, "%s/%s", d->directory, fname);
    219  tor_mmap_t *result = tor_mmap_file(path);
    220  int errval = errno;
    221  tor_free(path);
    222  if (result == NULL)
    223    errno = errval;
    224  return result;
    225 }
    226 
    227 /** Read a file within <b>d</b> into a newly allocated buffer.  Set
    228 * *<b>sz_out</b> to its size. */
    229 uint8_t *
    230 storage_dir_read(storage_dir_t *d, const char *fname, int bin, size_t *sz_out)
    231 {
    232  const int flags = bin ? RFTS_BIN : 0;
    233 
    234  char *path = NULL;
    235  tor_asprintf(&path, "%s/%s", d->directory, fname);
    236  struct stat st;
    237  char *contents = read_file_to_str(path, flags, &st);
    238  if (contents && sz_out) {
    239    // it fits in RAM, so we know its size is less than SIZE_MAX
    240 #if UINT64_MAX > SIZE_MAX
    241    tor_assert((uint64_t)st.st_size <= SIZE_MAX);
    242 #endif
    243    *sz_out = (size_t) st.st_size;
    244  }
    245 
    246  tor_free(path);
    247  return (uint8_t *) contents;
    248 }
    249 
    250 /** Helper: Find an unused filename within the directory */
    251 static char *
    252 find_unused_fname(storage_dir_t *d)
    253 {
    254  if (!d->contents) {
    255    if (storage_dir_rescan(d) < 0)
    256      return NULL;
    257  }
    258 
    259  char buf[16];
    260  int i;
    261  /* Yuck; this is quadratic.  Fortunately, that shouldn't matter much,
    262   * since disk writes are more expensive by a lot. */
    263  for (i = FNAME_MIN_NUM; i < FNAME_MIN_NUM + d->max_files; ++i) {
    264    tor_snprintf(buf, sizeof(buf), "%d", i);
    265    if (!smartlist_contains_string(d->contents, buf)) {
    266      return tor_strdup(buf);
    267    }
    268  }
    269  return NULL;
    270 }
    271 
    272 /** Helper: As storage_dir_save_bytes_to_file, but store a smartlist of
    273 * sized_chunk_t rather than a single byte array. */
    274 static int
    275 storage_dir_save_chunks_to_file(storage_dir_t *d,
    276                                const smartlist_t *chunks,
    277                                int binary,
    278                                char **fname_out)
    279 {
    280  uint64_t total_length = 0;
    281  char *fname = find_unused_fname(d);
    282  if (!fname)
    283    return -1;
    284 
    285  SMARTLIST_FOREACH(chunks, const sized_chunk_t *, ch,
    286                    total_length += ch->len);
    287 
    288  char *path = NULL;
    289  tor_asprintf(&path, "%s/%s", d->directory, fname);
    290 
    291  int r = write_chunks_to_file(path, chunks, binary, 0);
    292  if (r == 0) {
    293    if (d->usage_known)
    294      d->usage += total_length;
    295    if (fname_out) {
    296      *fname_out = tor_strdup(fname);
    297    }
    298    if (d->contents)
    299      smartlist_add(d->contents, tor_strdup(fname));
    300  }
    301  tor_free(fname);
    302  tor_free(path);
    303  return r;
    304 }
    305 
    306 /** Try to write the <b>length</b> bytes at <b>data</b> into a new file
    307 * in <b>d</b>.  On success, return 0 and set *<b>fname_out</b> to a
    308 * newly allocated string containing the filename.  On failure, return
    309 * -1. */
    310 int
    311 storage_dir_save_bytes_to_file(storage_dir_t *d,
    312                               const uint8_t *data,
    313                               size_t length,
    314                               int binary,
    315                               char **fname_out)
    316 {
    317  smartlist_t *chunks = smartlist_new();
    318  sized_chunk_t chunk = { (const char *)data, length };
    319  smartlist_add(chunks, &chunk);
    320  int r = storage_dir_save_chunks_to_file(d, chunks, binary, fname_out);
    321  smartlist_free(chunks);
    322  return r;
    323 }
    324 
    325 /**
    326 * As storage_dir_save_bytes_to_file, but saves a NUL-terminated string
    327 * <b>str</b>.
    328 */
    329 int
    330 storage_dir_save_string_to_file(storage_dir_t *d,
    331                                const char *str,
    332                                int binary,
    333                                char **fname_out)
    334 {
    335  return storage_dir_save_bytes_to_file(d,
    336                (const uint8_t*)str, strlen(str), binary, fname_out);
    337 }
    338 
    339 /**
    340 * As storage_dir_save_bytes_to_file, but associates the data with the
    341 * key-value pairs in <b>labels</b>. Files stored in this format can be
    342 * recovered with storage_dir_map_labeled() or storage_dir_read_labeled().
    343 */
    344 int
    345 storage_dir_save_labeled_to_file(storage_dir_t *d,
    346                                  const config_line_t *labels,
    347                                  const uint8_t *data,
    348                                  size_t length,
    349                                  char **fname_out)
    350 {
    351  /*
    352   * The storage format is to prefix the data with the key-value pairs in
    353   * <b>labels</b>, and a single NUL separator.  But code outside this module
    354   * MUST NOT rely on that format.
    355   */
    356 
    357  smartlist_t *chunks = smartlist_new();
    358  memarea_t *area = memarea_new();
    359  const config_line_t *line;
    360  for (line = labels; line; line = line->next) {
    361    sized_chunk_t *sz = memarea_alloc(area, sizeof(sized_chunk_t));
    362    sz->len = strlen(line->key) + 1 + strlen(line->value) + 1;
    363    const size_t allocated = sz->len + 1;
    364    char *bytes = memarea_alloc(area, allocated);
    365    tor_snprintf(bytes, allocated, "%s %s\n", line->key, line->value);
    366    sz->bytes = bytes;
    367    smartlist_add(chunks, sz);
    368  }
    369 
    370  sized_chunk_t *nul = memarea_alloc(area, sizeof(sized_chunk_t));
    371  nul->len = 1;
    372  nul->bytes = "\0";
    373  smartlist_add(chunks, nul);
    374 
    375  sized_chunk_t *datachunk = memarea_alloc(area, sizeof(sized_chunk_t));
    376  datachunk->bytes = (const char *)data;
    377  datachunk->len = length;
    378  smartlist_add(chunks, datachunk);
    379 
    380  int r = storage_dir_save_chunks_to_file(d, chunks, 1, fname_out);
    381  smartlist_free(chunks);
    382  memarea_drop_all(area);
    383  return r;
    384 }
    385 
    386 /**
    387 * Map a file that was created with storage_dir_save_labeled_to_file().  On
    388 * failure, return NULL.  On success, write a set of newly allocated labels
    389 * into *<b>labels_out</b>, a pointer to the data into *<b>data_out</b>, and
    390 * the data's size into *<b>sz_out</b>. On success, also return a tor_mmap_t
    391 * object whose contents should not be used -- it needs to be kept around,
    392 * though, for as long as <b>data_out</b> is going to be valid.
    393 *
    394 * On failure, set errno as for tor_mmap_file() if the file was missing or
    395 * empty, and set errno to EINVAL if the file was not in the labeled
    396 * format expected.
    397 */
    398 tor_mmap_t *
    399 storage_dir_map_labeled(storage_dir_t *dir,
    400                         const char *fname,
    401                         config_line_t **labels_out,
    402                         const uint8_t **data_out,
    403                         size_t *sz_out)
    404 {
    405  tor_mmap_t *m = storage_dir_map(dir, fname);
    406  int errval;
    407  if (! m) {
    408    errval = errno;
    409    goto err;
    410  }
    411  const char *nulp = memchr(m->data, '\0', m->size);
    412  if (! nulp) {
    413    errval = EINVAL;
    414    goto err;
    415  }
    416  if (labels_out && config_get_lines(m->data, labels_out, 0) < 0) {
    417    errval = EINVAL;
    418    goto err;
    419  }
    420  size_t offset = nulp - m->data + 1;
    421  tor_assert(offset <= m->size);
    422  *data_out = (const uint8_t *)(m->data + offset);
    423  *sz_out = m->size - offset;
    424 
    425  return m;
    426 err:
    427  tor_munmap_file(m);
    428  errno = errval;
    429  return NULL;
    430 }
    431 
    432 /** As storage_dir_map_labeled, but return a new byte array containing the
    433 * data. */
    434 uint8_t *
    435 storage_dir_read_labeled(storage_dir_t *dir,
    436                          const char *fname,
    437                          config_line_t **labels_out,
    438                          size_t *sz_out)
    439 {
    440  const uint8_t *data = NULL;
    441  tor_mmap_t *m = storage_dir_map_labeled(dir, fname, labels_out,
    442                                           &data, sz_out);
    443  if (m == NULL)
    444    return NULL;
    445  uint8_t *result = tor_memdup(data, *sz_out);
    446  tor_munmap_file(m);
    447  return result;
    448 }
    449 
    450 /* Reduce the cached usage amount in <b>d</b> by <b>removed_file_size</b>.
    451 * This function is a no-op if <b>d->usage_known</b> is 0. */
    452 static void
    453 storage_dir_reduce_usage(storage_dir_t *d, uint64_t removed_file_size)
    454 {
    455  if (d->usage_known) {
    456    if (! BUG(d->usage < removed_file_size)) {
    457      /* This bug can also be triggered if an external process resized a file
    458       * between the call to storage_dir_get_usage() that last checked
    459       * actual usage (rather than relaying on cached usage), and the call to
    460       * this function. */
    461      d->usage -= removed_file_size;
    462    } else {
    463      /* If we underflowed the cached directory size, re-check the sizes of all
    464       * the files in the directory. This makes storage_dir_shrink() quadratic,
    465       * but only if a process is continually changing file sizes in the
    466       * storage directory (in which case, we have bigger issues).
    467       *
    468       * We can't just reset usage_known, because storage_dir_shrink() relies
    469       * on knowing the usage. */
    470      storage_dir_rescan(d);
    471      (void)storage_dir_get_usage(d);
    472    }
    473  }
    474 }
    475 
    476 /**
    477 * Remove the file called <b>fname</b> from <b>d</b>.
    478 */
    479 void
    480 storage_dir_remove_file(storage_dir_t *d,
    481                        const char *fname)
    482 {
    483  char *path = NULL;
    484  tor_asprintf(&path, "%s/%s", d->directory, fname);
    485  const char *ipath = sandbox_intern_string(path);
    486 
    487  uint64_t size = 0;
    488  if (d->usage_known) {
    489    struct stat st;
    490    if (stat(ipath, &st) == 0) {
    491      size = st.st_size;
    492    }
    493  }
    494  if (unlink(ipath) == 0) {
    495    storage_dir_reduce_usage(d, size);
    496  } else {
    497    log_warn(LD_FS, "Unable to unlink %s while removing file: %s",
    498             escaped(path), strerror(errno));
    499    tor_free(path);
    500    return;
    501  }
    502  if (d->contents) {
    503    smartlist_string_remove(d->contents, fname);
    504  }
    505 
    506  tor_free(path);
    507 }
    508 
    509 /** Helper type: used to sort the members of storage directory by mtime. */
    510 typedef struct shrinking_dir_entry_t {
    511  time_t mtime;
    512  uint64_t size;
    513  char *path;
    514 } shrinking_dir_entry_t;
    515 
    516 /** Helper: use with qsort to sort shrinking_dir_entry_t structs. */
    517 static int
    518 shrinking_dir_entry_compare(const void *a_, const void *b_)
    519 {
    520  const shrinking_dir_entry_t *a = a_;
    521  const shrinking_dir_entry_t *b = b_;
    522 
    523  if (a->mtime < b->mtime)
    524    return -1;
    525  else if (a->mtime > b->mtime)
    526    return 1;
    527  else
    528    return 0;
    529 }
    530 
    531 /**
    532 * Try to free space by removing the oldest files in <b>d</b>. Delete
    533 * until no more than <b>target_size</b> bytes are left, and at least
    534 * <b>min_to_remove</b> files have been removed... or until there is
    535 * nothing left to remove.
    536 *
    537 * Return 0 on success; -1 on failure.
    538 */
    539 int
    540 storage_dir_shrink(storage_dir_t *d,
    541                   uint64_t target_size,
    542                   int min_to_remove)
    543 {
    544  if (d->usage_known && d->usage <= target_size && !min_to_remove) {
    545    /* Already small enough. */
    546    return 0;
    547  }
    548 
    549  if (storage_dir_rescan(d) < 0)
    550    return -1;
    551 
    552  const uint64_t orig_usage = storage_dir_get_usage(d);
    553  if (orig_usage <= target_size && !min_to_remove) {
    554    /* Okay, small enough after rescan! */
    555    return 0;
    556  }
    557 
    558  const int n = smartlist_len(d->contents);
    559  shrinking_dir_entry_t *ents = tor_calloc(n, sizeof(shrinking_dir_entry_t));
    560  SMARTLIST_FOREACH_BEGIN(d->contents, const char *, fname) {
    561    shrinking_dir_entry_t *ent = &ents[fname_sl_idx];
    562    struct stat st;
    563    tor_asprintf(&ent->path, "%s/%s", d->directory, fname);
    564    if (stat(sandbox_intern_string(ent->path), &st) == 0) {
    565      ent->mtime = st.st_mtime;
    566      ent->size = st.st_size;
    567    }
    568  } SMARTLIST_FOREACH_END(fname);
    569 
    570  qsort(ents, n, sizeof(shrinking_dir_entry_t), shrinking_dir_entry_compare);
    571 
    572  int idx = 0;
    573  while ((d->usage > target_size || min_to_remove > 0) && idx < n) {
    574    if (unlink(sandbox_intern_string(ents[idx].path)) == 0) {
    575      storage_dir_reduce_usage(d, ents[idx].size);
    576      --min_to_remove;
    577    }
    578    ++idx;
    579  }
    580 
    581  for (idx = 0; idx < n; ++idx) {
    582    tor_free(ents[idx].path);
    583  }
    584  tor_free(ents);
    585 
    586  storage_dir_rescan(d);
    587 
    588  return 0;
    589 }
    590 
    591 /** Remove all files in <b>d</b>. */
    592 int
    593 storage_dir_remove_all(storage_dir_t *d)
    594 {
    595  return storage_dir_shrink(d, 0, d->max_files);
    596 }
    597 
    598 /**
    599 * Return the largest number of non-temporary files we're willing to
    600 * store in <b>d</b>.
    601 */
    602 int
    603 storage_dir_get_max_files(storage_dir_t *d)
    604 {
    605  return d->max_files;
    606 }