tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

conscache.c (18573B)


      1 /* Copyright (c) 2017-2021, The Tor Project, Inc. */
      2 /* See LICENSE for licensing information */
      3 
      4 /**
      5 * @file conscache.c
      6 * @brief Consensus and diff on-disk cache.
      7 **/
      8 
      9 #include "core/or/or.h"
     10 
     11 #include "app/config/config.h"
     12 #include "feature/dircache/conscache.h"
     13 #include "lib/crypt_ops/crypto_util.h"
     14 #include "lib/fs/storagedir.h"
     15 #include "lib/encoding/confline.h"
     16 
     17 #define CCE_MAGIC 0x17162253
     18 
     19 #ifdef _WIN32
     20 /* On Windows, unlink won't work on a file if the file is actively mmap()ed.
     21 * That forces us to be less aggressive about unlinking files, and causes other
     22 * changes throughout our logic.
     23 */
     24 #define MUST_UNMAP_TO_UNLINK
     25 #endif /* defined(_WIN32) */
     26 
     27 /**
     28 * A consensus_cache_entry_t is a reference-counted handle to an
     29 * item in a consensus_cache_t.  It can be mmapped into RAM, or not,
     30 * depending whether it's currently in use.
     31 */
     32 struct consensus_cache_entry_t {
     33  uint32_t magic; /**< Must be set to CCE_MAGIC */
     34  HANDLE_ENTRY(consensus_cache_entry, consensus_cache_entry_t);
     35  int32_t refcnt; /**< Reference count. */
     36  unsigned can_remove : 1; /**< If true, we want to delete this file. */
     37  /** If true, we intend to unmap this file as soon as we're done with it. */
     38  unsigned release_aggressively : 1;
     39 
     40  /** Filename for this object within the storage_dir_t */
     41  char *fname;
     42  /** Labels associated with this object. Immutable once the object
     43   * is created. */
     44  config_line_t *labels;
     45  /** Pointer to the cache that includes this entry (if any). */
     46  consensus_cache_t *in_cache;
     47 
     48  /** Since what time has this object been mapped into RAM, but with the cache
     49   * being the only having a reference to it? */
     50  time_t unused_since;
     51  /** mmaped contents of the underlying file.  May be NULL */
     52  tor_mmap_t *map;
     53  /** Length of the body within <b>map</b>. */
     54  size_t bodylen;
     55  /** Pointer to the body within <b>map</b>. */
     56  const uint8_t *body;
     57 };
     58 
     59 /**
     60 * A consensus_cache_t holds a directory full of labeled items.
     61 */
     62 struct consensus_cache_t {
     63  /** Underling storage_dir_t to handle persistence */
     64  storage_dir_t *dir;
     65  /** List of all the entries in the directory. */
     66  smartlist_t *entries;
     67 
     68  /** The maximum number of entries that we'd like to allow in this cache.
     69   * This is the same as the storagedir limit when MUST_UNMAP_TO_UNLINK is
     70   * not defined. */
     71  unsigned max_entries;
     72 };
     73 
     74 static void consensus_cache_clear(consensus_cache_t *cache);
     75 static void consensus_cache_rescan(consensus_cache_t *);
     76 static void consensus_cache_entry_map(consensus_cache_t *,
     77                                      consensus_cache_entry_t *);
     78 static void consensus_cache_entry_unmap(consensus_cache_entry_t *ent);
     79 
     80 /**
     81 * Helper: Open a consensus cache in subdirectory <b>subdir</b> of the
     82 * data directory, to hold up to <b>max_entries</b> of data.
     83 */
     84 consensus_cache_t *
     85 consensus_cache_open(const char *subdir, int max_entries)
     86 {
     87  int storagedir_max_entries;
     88  consensus_cache_t *cache = tor_malloc_zero(sizeof(consensus_cache_t));
     89  char *directory = get_cachedir_fname(subdir);
     90  cache->max_entries = max_entries;
     91 
     92 #ifdef MUST_UNMAP_TO_UNLINK
     93  /* If we can't unlink the files that we're still using, then we need to
     94   * tell the storagedir backend to allow far more files than this consensus
     95   * cache actually wants, so that it can hold files which, from this cache's
     96   * perspective, have become useless.
     97   */
     98 #define VERY_LARGE_STORAGEDIR_LIMIT (1000*1000)
     99  storagedir_max_entries = VERY_LARGE_STORAGEDIR_LIMIT;
    100 #else /* !defined(MUST_UNMAP_TO_UNLINK) */
    101  /* Otherwise, we can just tell the storagedir to use the same limits
    102   * as this cache. */
    103  storagedir_max_entries = max_entries;
    104 #endif /* defined(MUST_UNMAP_TO_UNLINK) */
    105 
    106  cache->dir = storage_dir_new(directory, storagedir_max_entries);
    107  tor_free(directory);
    108  if (!cache->dir) {
    109    tor_free(cache);
    110    return NULL;
    111  }
    112 
    113  consensus_cache_rescan(cache);
    114  return cache;
    115 }
    116 
    117 /** Return true if it's okay to put more entries in this cache than
    118 * its official file limit.
    119 *
    120 * (We need this method on Windows, where we can't unlink files that are still
    121 * in use, and therefore might need to temporarily exceed the file limit until
    122 * the no-longer-wanted files are deletable.)
    123 */
    124 int
    125 consensus_cache_may_overallocate(consensus_cache_t *cache)
    126 {
    127  (void) cache;
    128 #ifdef MUST_UNMAP_TO_UNLINK
    129  return 1;
    130 #else
    131  return 0;
    132 #endif
    133 }
    134 
    135 // HACK: GCC on Appveyor hates that we may assert before returning. Work around
    136 // the error.
    137 #ifdef _WIN32
    138 #ifndef COCCI
    139 #pragma GCC diagnostic push
    140 #pragma GCC diagnostic ignored "-Wsuggest-attribute=noreturn"
    141 #endif
    142 #endif /* defined(_WIN32) */
    143 
    144 /**
    145 * Tell the sandbox (if any) configured by <b>cfg</b> to allow the
    146 * operations that <b>cache</b> will need.
    147 */
    148 int
    149 consensus_cache_register_with_sandbox(consensus_cache_t *cache,
    150                                      struct sandbox_cfg_elem_t **cfg)
    151 {
    152 #ifdef MUST_UNMAP_TO_UNLINK
    153  /* Our Linux sandbox doesn't support huge file lists like the one that would
    154   * be generated by using VERY_LARGE_STORAGEDIR_LIMIT above in
    155   * consensus_cache_open().  Since the Linux sandbox is the only one we have
    156   * right now, we just assert that we never reach this point when we've had
    157   * to use VERY_LARGE_STORAGEDIR_LIMIT.
    158   *
    159   * If at some point in the future we have a different sandbox mechanism that
    160   * can handle huge file lists, we can remove this assertion or make it
    161   * conditional.
    162   */
    163  tor_assert_nonfatal_unreached();
    164 #endif /* defined(MUST_UNMAP_TO_UNLINK) */
    165  return storage_dir_register_with_sandbox(cache->dir, cfg);
    166 }
    167 
    168 #ifdef _WIN32
    169 #ifndef COCCI
    170 #pragma GCC diagnostic pop
    171 #endif
    172 #endif
    173 
    174 /**
    175 * Helper: clear all entries from <b>cache</b> (but do not delete
    176 * any that aren't marked for removal
    177 */
    178 static void
    179 consensus_cache_clear(consensus_cache_t *cache)
    180 {
    181  consensus_cache_delete_pending(cache, 0);
    182 
    183  SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
    184    ent->in_cache = NULL;
    185    consensus_cache_entry_decref(ent);
    186  } SMARTLIST_FOREACH_END(ent);
    187  smartlist_free(cache->entries);
    188  cache->entries = NULL;
    189 }
    190 
    191 /**
    192 * Drop all storage held by <b>cache</b>.
    193 */
    194 void
    195 consensus_cache_free_(consensus_cache_t *cache)
    196 {
    197  if (! cache)
    198    return;
    199 
    200  if (cache->entries) {
    201    consensus_cache_clear(cache);
    202  }
    203  storage_dir_free(cache->dir);
    204  tor_free(cache);
    205 }
    206 
    207 /**
    208 * Write <b>datalen</b> bytes of data at <b>data</b> into the <b>cache</b>,
    209 * labeling that data with <b>labels</b>.  On failure, return NULL. On
    210 * success, return a newly created consensus_cache_entry_t.
    211 *
    212 * The returned value will be owned by the cache, and you will have a
    213 * reference to it.  Call consensus_cache_entry_decref() when you are
    214 * done with it.
    215 *
    216 * The provided <b>labels</b> MUST have distinct keys: if they don't,
    217 * this API does not specify which values (if any) for the duplicate keys
    218 * will be considered.
    219 */
    220 consensus_cache_entry_t *
    221 consensus_cache_add(consensus_cache_t *cache,
    222                    const config_line_t *labels,
    223                    const uint8_t *data,
    224                    size_t datalen)
    225 {
    226  char *fname = NULL;
    227  int r = storage_dir_save_labeled_to_file(cache->dir,
    228                                            labels, data, datalen, &fname);
    229  if (r < 0 || fname == NULL) {
    230    return NULL;
    231  }
    232  consensus_cache_entry_t *ent =
    233    tor_malloc_zero(sizeof(consensus_cache_entry_t));
    234  ent->magic = CCE_MAGIC;
    235  ent->fname = fname;
    236  ent->labels = config_lines_dup(labels);
    237  ent->in_cache = cache;
    238  ent->unused_since = TIME_MAX;
    239  smartlist_add(cache->entries, ent);
    240  /* Start the reference count at 2: the caller owns one copy, and the
    241   * cache owns another.
    242   */
    243  ent->refcnt = 2;
    244 
    245  return ent;
    246 }
    247 
    248 /**
    249 * Given a <b>cache</b>, return some entry for which <b>key</b>=<b>value</b>.
    250 * Return NULL if no such entry exists.
    251 *
    252 * Does not adjust reference counts.
    253 */
    254 consensus_cache_entry_t *
    255 consensus_cache_find_first(consensus_cache_t *cache,
    256                           const char *key,
    257                           const char *value)
    258 {
    259  smartlist_t *tmp = smartlist_new();
    260  consensus_cache_find_all(tmp, cache, key, value);
    261  consensus_cache_entry_t *ent = NULL;
    262  if (smartlist_len(tmp))
    263    ent = smartlist_get(tmp, 0);
    264  smartlist_free(tmp);
    265  return ent;
    266 }
    267 
    268 /**
    269 * Given a <b>cache</b>, add every entry to <b>out</b> for which
    270 * <b>key</b>=<b>value</b>.  If <b>key</b> is NULL, add every entry.
    271 *
    272 * Do not add any entry that has been marked for removal.
    273 *
    274 * Does not adjust reference counts.
    275 */
    276 void
    277 consensus_cache_find_all(smartlist_t *out,
    278                         consensus_cache_t *cache,
    279                         const char *key,
    280                         const char *value)
    281 {
    282  SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
    283    if (ent->can_remove == 1) {
    284      /* We want to delete this; pretend it isn't there. */
    285      continue;
    286    }
    287    if (! key) {
    288      smartlist_add(out, ent);
    289      continue;
    290    }
    291    const char *found_val = consensus_cache_entry_get_value(ent, key);
    292    if (found_val && !strcmp(value, found_val)) {
    293      smartlist_add(out, ent);
    294    }
    295  } SMARTLIST_FOREACH_END(ent);
    296 }
    297 
    298 /**
    299 * Given a list of consensus_cache_entry_t, remove all those entries
    300 * that do not have <b>key</b>=<b>value</b> in their labels.
    301 *
    302 * Does not adjust reference counts.
    303 */
    304 void
    305 consensus_cache_filter_list(smartlist_t *lst,
    306                            const char *key,
    307                            const char *value)
    308 {
    309  if (BUG(lst == NULL))
    310    return; // LCOV_EXCL_LINE
    311  if (key == NULL)
    312    return;
    313  SMARTLIST_FOREACH_BEGIN(lst, consensus_cache_entry_t *, ent) {
    314    const char *found_val = consensus_cache_entry_get_value(ent, key);
    315    if (! found_val || strcmp(value, found_val)) {
    316      SMARTLIST_DEL_CURRENT(lst, ent);
    317    }
    318  } SMARTLIST_FOREACH_END(ent);
    319 }
    320 
    321 /**
    322 * If <b>ent</b> has a label with the given <b>key</b>, return its
    323 * value.  Otherwise return NULL.
    324 *
    325 * The return value is only guaranteed to be valid for as long as you
    326 * hold a reference to <b>ent</b>.
    327 */
    328 const char *
    329 consensus_cache_entry_get_value(const consensus_cache_entry_t *ent,
    330                                const char *key)
    331 {
    332  const config_line_t *match = config_line_find(ent->labels, key);
    333  if (match)
    334    return match->value;
    335  else
    336    return NULL;
    337 }
    338 
    339 /**
    340 * Return a pointer to the labels in <b>ent</b>.
    341 *
    342 * This pointer is only guaranteed to be valid for as long as you
    343 * hold a reference to <b>ent</b>.
    344 */
    345 const config_line_t *
    346 consensus_cache_entry_get_labels(const consensus_cache_entry_t *ent)
    347 {
    348  return ent->labels;
    349 }
    350 
    351 /**
    352 * Increase the reference count of <b>ent</b>.
    353 */
    354 void
    355 consensus_cache_entry_incref(consensus_cache_entry_t *ent)
    356 {
    357  if (BUG(ent->magic != CCE_MAGIC))
    358    return; // LCOV_EXCL_LINE
    359  ++ent->refcnt;
    360  ent->unused_since = TIME_MAX;
    361 }
    362 
    363 /**
    364 * Release a reference held to <b>ent</b>.
    365 *
    366 * If it was the last reference, ent will be freed. Therefore, you must not
    367 * use <b>ent</b> after calling this function.
    368 */
    369 void
    370 consensus_cache_entry_decref(consensus_cache_entry_t *ent)
    371 {
    372  if (! ent)
    373    return;
    374  if (BUG(ent->refcnt <= 0))
    375    return; // LCOV_EXCL_LINE
    376  if (BUG(ent->magic != CCE_MAGIC))
    377    return; // LCOV_EXCL_LINE
    378 
    379  --ent->refcnt;
    380 
    381  if (ent->refcnt == 1 && ent->in_cache) {
    382    /* Only the cache has a reference: we don't need to keep the file
    383     * mapped */
    384    if (ent->map) {
    385      if (ent->release_aggressively) {
    386        consensus_cache_entry_unmap(ent);
    387      } else {
    388        ent->unused_since = approx_time();
    389      }
    390    }
    391    return;
    392  }
    393 
    394  if (ent->refcnt > 0)
    395    return;
    396 
    397  /* Refcount is zero; we can free it. */
    398  if (ent->map) {
    399    consensus_cache_entry_unmap(ent);
    400  }
    401  tor_free(ent->fname);
    402  config_free_lines(ent->labels);
    403  consensus_cache_entry_handles_clear(ent);
    404  memwipe(ent, 0, sizeof(consensus_cache_entry_t));
    405  tor_free(ent);
    406 }
    407 
    408 /**
    409 * Mark <b>ent</b> for deletion from the cache.  Deletion will not occur
    410 * until the cache is the only place that holds a reference to <b>ent</b>.
    411 */
    412 void
    413 consensus_cache_entry_mark_for_removal(consensus_cache_entry_t *ent)
    414 {
    415  ent->can_remove = 1;
    416 }
    417 
    418 /**
    419 * Mark <b>ent</b> as the kind of entry that we don't need to keep mmap'd for
    420 * any longer than we're actually using it.
    421 */
    422 void
    423 consensus_cache_entry_mark_for_aggressive_release(consensus_cache_entry_t *ent)
    424 {
    425  ent->release_aggressively = 1;
    426 }
    427 
    428 /**
    429 * Try to read the body of <b>ent</b> into memory if it isn't already
    430 * loaded.  On success, set *<b>body_out</b> to the body, *<b>sz_out</b>
    431 * to its size, and return 0.  On failure return -1.
    432 *
    433 * The resulting body pointer will only be valid for as long as you
    434 * hold a reference to <b>ent</b>.
    435 */
    436 int
    437 consensus_cache_entry_get_body(const consensus_cache_entry_t *ent,
    438                               const uint8_t **body_out,
    439                               size_t *sz_out)
    440 {
    441  if (BUG(ent->magic != CCE_MAGIC))
    442    return -1; // LCOV_EXCL_LINE
    443 
    444  if (! ent->map) {
    445    if (! ent->in_cache)
    446      return -1;
    447 
    448    consensus_cache_entry_map((consensus_cache_t *)ent->in_cache,
    449                              (consensus_cache_entry_t *)ent);
    450    if (! ent->map) {
    451      return -1;
    452    }
    453  }
    454 
    455  *body_out = ent->body;
    456  *sz_out = ent->bodylen;
    457  return 0;
    458 }
    459 
    460 /**
    461 * Unmap every mmap'd element of <b>cache</b> that has been unused
    462 * since <b>cutoff</b>.
    463 */
    464 void
    465 consensus_cache_unmap_lazy(consensus_cache_t *cache, time_t cutoff)
    466 {
    467  SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
    468    tor_assert_nonfatal(ent->in_cache == cache);
    469    if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) {
    470      /* Somebody is using this entry right now */
    471      continue;
    472    }
    473    if (ent->unused_since > cutoff) {
    474      /* Has been unused only for a little while */
    475      continue;
    476    }
    477    if (ent->map == NULL) {
    478      /* Not actually mapped. */
    479      continue;
    480    }
    481    consensus_cache_entry_unmap(ent);
    482  } SMARTLIST_FOREACH_END(ent);
    483 }
    484 
    485 /**
    486 * Return the number of currently unused filenames available in this cache.
    487 */
    488 int
    489 consensus_cache_get_n_filenames_available(consensus_cache_t *cache)
    490 {
    491  tor_assert(cache);
    492  int max = cache->max_entries;
    493  int used = smartlist_len(storage_dir_list(cache->dir));
    494 #ifdef MUST_UNMAP_TO_UNLINK
    495  if (used > max)
    496    return 0;
    497 #else
    498  tor_assert_nonfatal(max >= used);
    499 #endif /* defined(MUST_UNMAP_TO_UNLINK) */
    500  return max - used;
    501 }
    502 
    503 /**
    504 * Delete every element of <b>cache</b> has been marked with
    505 * consensus_cache_entry_mark_for_removal. If <b>force</b> is false,
    506 * retain those entries which are in use by something other than the cache.
    507 */
    508 void
    509 consensus_cache_delete_pending(consensus_cache_t *cache, int force)
    510 {
    511  SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) {
    512    tor_assert_nonfatal(ent->in_cache == cache);
    513    int force_ent = force;
    514 #ifdef MUST_UNMAP_TO_UNLINK
    515    /* We cannot delete anything with an active mmap on win32, so no
    516     * force-deletion. */
    517    if (ent->map) {
    518      force_ent = 0;
    519    }
    520 #endif /* defined(MUST_UNMAP_TO_UNLINK) */
    521    if (! force_ent) {
    522      if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) {
    523        /* Somebody is using this entry right now */
    524        continue;
    525      }
    526    }
    527    if (ent->can_remove == 0) {
    528      /* Don't want to delete this. */
    529      continue;
    530    }
    531    if (BUG(ent->refcnt <= 0)) {
    532      continue; // LCOV_EXCL_LINE
    533    }
    534 
    535    SMARTLIST_DEL_CURRENT(cache->entries, ent);
    536    ent->in_cache = NULL;
    537    char *fname = tor_strdup(ent->fname); /* save a copy */
    538    consensus_cache_entry_decref(ent);
    539    storage_dir_remove_file(cache->dir, fname);
    540    tor_free(fname);
    541  } SMARTLIST_FOREACH_END(ent);
    542 }
    543 
    544 /**
    545 * Internal helper: rescan <b>cache</b> and rebuild its list of entries.
    546 */
    547 static void
    548 consensus_cache_rescan(consensus_cache_t *cache)
    549 {
    550  if (cache->entries) {
    551    consensus_cache_clear(cache);
    552  }
    553 
    554  cache->entries = smartlist_new();
    555  const smartlist_t *fnames = storage_dir_list(cache->dir);
    556  SMARTLIST_FOREACH_BEGIN(fnames, const char *, fname) {
    557    tor_mmap_t *map = NULL;
    558    config_line_t *labels = NULL;
    559    const uint8_t *body;
    560    size_t bodylen;
    561    map = storage_dir_map_labeled(cache->dir, fname,
    562                                  &labels, &body, &bodylen);
    563    if (! map) {
    564      /* The ERANGE error might come from tor_mmap_file() -- it means the file
    565       * was empty. EINVAL might come from ..map_labeled() -- it means the
    566       * file was misformatted. In both cases, we should just delete it.
    567       */
    568      if (errno == ERANGE || errno == EINVAL) {
    569        log_warn(LD_FS, "Found %s file %s in consensus cache; removing it.",
    570                 errno == ERANGE ? "empty" : "misformatted",
    571                 escaped(fname));
    572        storage_dir_remove_file(cache->dir, fname);
    573      } else {
    574        /* Can't load this; continue */
    575        log_warn(LD_FS, "Unable to map file %s from consensus cache: %s",
    576                 escaped(fname), strerror(errno));
    577      }
    578      continue;
    579    }
    580    consensus_cache_entry_t *ent =
    581      tor_malloc_zero(sizeof(consensus_cache_entry_t));
    582    ent->magic = CCE_MAGIC;
    583    ent->fname = tor_strdup(fname);
    584    ent->labels = labels;
    585    ent->refcnt = 1;
    586    ent->in_cache = cache;
    587    ent->unused_since = TIME_MAX;
    588    smartlist_add(cache->entries, ent);
    589    tor_munmap_file(map); /* don't actually need to keep this around */
    590  } SMARTLIST_FOREACH_END(fname);
    591 }
    592 
    593 /**
    594 * Make sure that <b>ent</b> is mapped into RAM.
    595 */
    596 static void
    597 consensus_cache_entry_map(consensus_cache_t *cache,
    598                          consensus_cache_entry_t *ent)
    599 {
    600  if (ent->map)
    601    return;
    602 
    603  ent->map = storage_dir_map_labeled(cache->dir, ent->fname,
    604                                     NULL, &ent->body, &ent->bodylen);
    605  ent->unused_since = TIME_MAX;
    606 }
    607 
    608 /**
    609 * Unmap <b>ent</b> from RAM.
    610 *
    611 * Do not call this if something other than the cache is holding a reference
    612 * to <b>ent</b>
    613 */
    614 static void
    615 consensus_cache_entry_unmap(consensus_cache_entry_t *ent)
    616 {
    617  ent->unused_since = TIME_MAX;
    618  if (!ent->map)
    619    return;
    620 
    621  tor_munmap_file(ent->map);
    622  ent->map = NULL;
    623  ent->body = NULL;
    624  ent->bodylen = 0;
    625  ent->unused_since = TIME_MAX;
    626 }
    627 
    628 HANDLE_IMPL(consensus_cache_entry, consensus_cache_entry_t, )
    629 
    630 #ifdef TOR_UNIT_TESTS
    631 /**
    632 * Testing only: Return true iff <b>ent</b> is mapped into memory.
    633 *
    634 * (In normal operation, this information is not exposed.)
    635 */
    636 int
    637 consensus_cache_entry_is_mapped(consensus_cache_entry_t *ent)
    638 {
    639  if (ent->map) {
    640    tor_assert(ent->body);
    641    return 1;
    642  } else {
    643    tor_assert(!ent->body);
    644    return 0;
    645  }
    646 }
    647 #endif /* defined(TOR_UNIT_TESTS) */