conscache.c (18573B)
1 /* Copyright (c) 2017-2021, The Tor Project, Inc. */ 2 /* See LICENSE for licensing information */ 3 4 /** 5 * @file conscache.c 6 * @brief Consensus and diff on-disk cache. 7 **/ 8 9 #include "core/or/or.h" 10 11 #include "app/config/config.h" 12 #include "feature/dircache/conscache.h" 13 #include "lib/crypt_ops/crypto_util.h" 14 #include "lib/fs/storagedir.h" 15 #include "lib/encoding/confline.h" 16 17 #define CCE_MAGIC 0x17162253 18 19 #ifdef _WIN32 20 /* On Windows, unlink won't work on a file if the file is actively mmap()ed. 21 * That forces us to be less aggressive about unlinking files, and causes other 22 * changes throughout our logic. 23 */ 24 #define MUST_UNMAP_TO_UNLINK 25 #endif /* defined(_WIN32) */ 26 27 /** 28 * A consensus_cache_entry_t is a reference-counted handle to an 29 * item in a consensus_cache_t. It can be mmapped into RAM, or not, 30 * depending whether it's currently in use. 31 */ 32 struct consensus_cache_entry_t { 33 uint32_t magic; /**< Must be set to CCE_MAGIC */ 34 HANDLE_ENTRY(consensus_cache_entry, consensus_cache_entry_t); 35 int32_t refcnt; /**< Reference count. */ 36 unsigned can_remove : 1; /**< If true, we want to delete this file. */ 37 /** If true, we intend to unmap this file as soon as we're done with it. */ 38 unsigned release_aggressively : 1; 39 40 /** Filename for this object within the storage_dir_t */ 41 char *fname; 42 /** Labels associated with this object. Immutable once the object 43 * is created. */ 44 config_line_t *labels; 45 /** Pointer to the cache that includes this entry (if any). */ 46 consensus_cache_t *in_cache; 47 48 /** Since what time has this object been mapped into RAM, but with the cache 49 * being the only having a reference to it? */ 50 time_t unused_since; 51 /** mmaped contents of the underlying file. May be NULL */ 52 tor_mmap_t *map; 53 /** Length of the body within <b>map</b>. */ 54 size_t bodylen; 55 /** Pointer to the body within <b>map</b>. */ 56 const uint8_t *body; 57 }; 58 59 /** 60 * A consensus_cache_t holds a directory full of labeled items. 61 */ 62 struct consensus_cache_t { 63 /** Underling storage_dir_t to handle persistence */ 64 storage_dir_t *dir; 65 /** List of all the entries in the directory. */ 66 smartlist_t *entries; 67 68 /** The maximum number of entries that we'd like to allow in this cache. 69 * This is the same as the storagedir limit when MUST_UNMAP_TO_UNLINK is 70 * not defined. */ 71 unsigned max_entries; 72 }; 73 74 static void consensus_cache_clear(consensus_cache_t *cache); 75 static void consensus_cache_rescan(consensus_cache_t *); 76 static void consensus_cache_entry_map(consensus_cache_t *, 77 consensus_cache_entry_t *); 78 static void consensus_cache_entry_unmap(consensus_cache_entry_t *ent); 79 80 /** 81 * Helper: Open a consensus cache in subdirectory <b>subdir</b> of the 82 * data directory, to hold up to <b>max_entries</b> of data. 83 */ 84 consensus_cache_t * 85 consensus_cache_open(const char *subdir, int max_entries) 86 { 87 int storagedir_max_entries; 88 consensus_cache_t *cache = tor_malloc_zero(sizeof(consensus_cache_t)); 89 char *directory = get_cachedir_fname(subdir); 90 cache->max_entries = max_entries; 91 92 #ifdef MUST_UNMAP_TO_UNLINK 93 /* If we can't unlink the files that we're still using, then we need to 94 * tell the storagedir backend to allow far more files than this consensus 95 * cache actually wants, so that it can hold files which, from this cache's 96 * perspective, have become useless. 97 */ 98 #define VERY_LARGE_STORAGEDIR_LIMIT (1000*1000) 99 storagedir_max_entries = VERY_LARGE_STORAGEDIR_LIMIT; 100 #else /* !defined(MUST_UNMAP_TO_UNLINK) */ 101 /* Otherwise, we can just tell the storagedir to use the same limits 102 * as this cache. */ 103 storagedir_max_entries = max_entries; 104 #endif /* defined(MUST_UNMAP_TO_UNLINK) */ 105 106 cache->dir = storage_dir_new(directory, storagedir_max_entries); 107 tor_free(directory); 108 if (!cache->dir) { 109 tor_free(cache); 110 return NULL; 111 } 112 113 consensus_cache_rescan(cache); 114 return cache; 115 } 116 117 /** Return true if it's okay to put more entries in this cache than 118 * its official file limit. 119 * 120 * (We need this method on Windows, where we can't unlink files that are still 121 * in use, and therefore might need to temporarily exceed the file limit until 122 * the no-longer-wanted files are deletable.) 123 */ 124 int 125 consensus_cache_may_overallocate(consensus_cache_t *cache) 126 { 127 (void) cache; 128 #ifdef MUST_UNMAP_TO_UNLINK 129 return 1; 130 #else 131 return 0; 132 #endif 133 } 134 135 // HACK: GCC on Appveyor hates that we may assert before returning. Work around 136 // the error. 137 #ifdef _WIN32 138 #ifndef COCCI 139 #pragma GCC diagnostic push 140 #pragma GCC diagnostic ignored "-Wsuggest-attribute=noreturn" 141 #endif 142 #endif /* defined(_WIN32) */ 143 144 /** 145 * Tell the sandbox (if any) configured by <b>cfg</b> to allow the 146 * operations that <b>cache</b> will need. 147 */ 148 int 149 consensus_cache_register_with_sandbox(consensus_cache_t *cache, 150 struct sandbox_cfg_elem_t **cfg) 151 { 152 #ifdef MUST_UNMAP_TO_UNLINK 153 /* Our Linux sandbox doesn't support huge file lists like the one that would 154 * be generated by using VERY_LARGE_STORAGEDIR_LIMIT above in 155 * consensus_cache_open(). Since the Linux sandbox is the only one we have 156 * right now, we just assert that we never reach this point when we've had 157 * to use VERY_LARGE_STORAGEDIR_LIMIT. 158 * 159 * If at some point in the future we have a different sandbox mechanism that 160 * can handle huge file lists, we can remove this assertion or make it 161 * conditional. 162 */ 163 tor_assert_nonfatal_unreached(); 164 #endif /* defined(MUST_UNMAP_TO_UNLINK) */ 165 return storage_dir_register_with_sandbox(cache->dir, cfg); 166 } 167 168 #ifdef _WIN32 169 #ifndef COCCI 170 #pragma GCC diagnostic pop 171 #endif 172 #endif 173 174 /** 175 * Helper: clear all entries from <b>cache</b> (but do not delete 176 * any that aren't marked for removal 177 */ 178 static void 179 consensus_cache_clear(consensus_cache_t *cache) 180 { 181 consensus_cache_delete_pending(cache, 0); 182 183 SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) { 184 ent->in_cache = NULL; 185 consensus_cache_entry_decref(ent); 186 } SMARTLIST_FOREACH_END(ent); 187 smartlist_free(cache->entries); 188 cache->entries = NULL; 189 } 190 191 /** 192 * Drop all storage held by <b>cache</b>. 193 */ 194 void 195 consensus_cache_free_(consensus_cache_t *cache) 196 { 197 if (! cache) 198 return; 199 200 if (cache->entries) { 201 consensus_cache_clear(cache); 202 } 203 storage_dir_free(cache->dir); 204 tor_free(cache); 205 } 206 207 /** 208 * Write <b>datalen</b> bytes of data at <b>data</b> into the <b>cache</b>, 209 * labeling that data with <b>labels</b>. On failure, return NULL. On 210 * success, return a newly created consensus_cache_entry_t. 211 * 212 * The returned value will be owned by the cache, and you will have a 213 * reference to it. Call consensus_cache_entry_decref() when you are 214 * done with it. 215 * 216 * The provided <b>labels</b> MUST have distinct keys: if they don't, 217 * this API does not specify which values (if any) for the duplicate keys 218 * will be considered. 219 */ 220 consensus_cache_entry_t * 221 consensus_cache_add(consensus_cache_t *cache, 222 const config_line_t *labels, 223 const uint8_t *data, 224 size_t datalen) 225 { 226 char *fname = NULL; 227 int r = storage_dir_save_labeled_to_file(cache->dir, 228 labels, data, datalen, &fname); 229 if (r < 0 || fname == NULL) { 230 return NULL; 231 } 232 consensus_cache_entry_t *ent = 233 tor_malloc_zero(sizeof(consensus_cache_entry_t)); 234 ent->magic = CCE_MAGIC; 235 ent->fname = fname; 236 ent->labels = config_lines_dup(labels); 237 ent->in_cache = cache; 238 ent->unused_since = TIME_MAX; 239 smartlist_add(cache->entries, ent); 240 /* Start the reference count at 2: the caller owns one copy, and the 241 * cache owns another. 242 */ 243 ent->refcnt = 2; 244 245 return ent; 246 } 247 248 /** 249 * Given a <b>cache</b>, return some entry for which <b>key</b>=<b>value</b>. 250 * Return NULL if no such entry exists. 251 * 252 * Does not adjust reference counts. 253 */ 254 consensus_cache_entry_t * 255 consensus_cache_find_first(consensus_cache_t *cache, 256 const char *key, 257 const char *value) 258 { 259 smartlist_t *tmp = smartlist_new(); 260 consensus_cache_find_all(tmp, cache, key, value); 261 consensus_cache_entry_t *ent = NULL; 262 if (smartlist_len(tmp)) 263 ent = smartlist_get(tmp, 0); 264 smartlist_free(tmp); 265 return ent; 266 } 267 268 /** 269 * Given a <b>cache</b>, add every entry to <b>out</b> for which 270 * <b>key</b>=<b>value</b>. If <b>key</b> is NULL, add every entry. 271 * 272 * Do not add any entry that has been marked for removal. 273 * 274 * Does not adjust reference counts. 275 */ 276 void 277 consensus_cache_find_all(smartlist_t *out, 278 consensus_cache_t *cache, 279 const char *key, 280 const char *value) 281 { 282 SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) { 283 if (ent->can_remove == 1) { 284 /* We want to delete this; pretend it isn't there. */ 285 continue; 286 } 287 if (! key) { 288 smartlist_add(out, ent); 289 continue; 290 } 291 const char *found_val = consensus_cache_entry_get_value(ent, key); 292 if (found_val && !strcmp(value, found_val)) { 293 smartlist_add(out, ent); 294 } 295 } SMARTLIST_FOREACH_END(ent); 296 } 297 298 /** 299 * Given a list of consensus_cache_entry_t, remove all those entries 300 * that do not have <b>key</b>=<b>value</b> in their labels. 301 * 302 * Does not adjust reference counts. 303 */ 304 void 305 consensus_cache_filter_list(smartlist_t *lst, 306 const char *key, 307 const char *value) 308 { 309 if (BUG(lst == NULL)) 310 return; // LCOV_EXCL_LINE 311 if (key == NULL) 312 return; 313 SMARTLIST_FOREACH_BEGIN(lst, consensus_cache_entry_t *, ent) { 314 const char *found_val = consensus_cache_entry_get_value(ent, key); 315 if (! found_val || strcmp(value, found_val)) { 316 SMARTLIST_DEL_CURRENT(lst, ent); 317 } 318 } SMARTLIST_FOREACH_END(ent); 319 } 320 321 /** 322 * If <b>ent</b> has a label with the given <b>key</b>, return its 323 * value. Otherwise return NULL. 324 * 325 * The return value is only guaranteed to be valid for as long as you 326 * hold a reference to <b>ent</b>. 327 */ 328 const char * 329 consensus_cache_entry_get_value(const consensus_cache_entry_t *ent, 330 const char *key) 331 { 332 const config_line_t *match = config_line_find(ent->labels, key); 333 if (match) 334 return match->value; 335 else 336 return NULL; 337 } 338 339 /** 340 * Return a pointer to the labels in <b>ent</b>. 341 * 342 * This pointer is only guaranteed to be valid for as long as you 343 * hold a reference to <b>ent</b>. 344 */ 345 const config_line_t * 346 consensus_cache_entry_get_labels(const consensus_cache_entry_t *ent) 347 { 348 return ent->labels; 349 } 350 351 /** 352 * Increase the reference count of <b>ent</b>. 353 */ 354 void 355 consensus_cache_entry_incref(consensus_cache_entry_t *ent) 356 { 357 if (BUG(ent->magic != CCE_MAGIC)) 358 return; // LCOV_EXCL_LINE 359 ++ent->refcnt; 360 ent->unused_since = TIME_MAX; 361 } 362 363 /** 364 * Release a reference held to <b>ent</b>. 365 * 366 * If it was the last reference, ent will be freed. Therefore, you must not 367 * use <b>ent</b> after calling this function. 368 */ 369 void 370 consensus_cache_entry_decref(consensus_cache_entry_t *ent) 371 { 372 if (! ent) 373 return; 374 if (BUG(ent->refcnt <= 0)) 375 return; // LCOV_EXCL_LINE 376 if (BUG(ent->magic != CCE_MAGIC)) 377 return; // LCOV_EXCL_LINE 378 379 --ent->refcnt; 380 381 if (ent->refcnt == 1 && ent->in_cache) { 382 /* Only the cache has a reference: we don't need to keep the file 383 * mapped */ 384 if (ent->map) { 385 if (ent->release_aggressively) { 386 consensus_cache_entry_unmap(ent); 387 } else { 388 ent->unused_since = approx_time(); 389 } 390 } 391 return; 392 } 393 394 if (ent->refcnt > 0) 395 return; 396 397 /* Refcount is zero; we can free it. */ 398 if (ent->map) { 399 consensus_cache_entry_unmap(ent); 400 } 401 tor_free(ent->fname); 402 config_free_lines(ent->labels); 403 consensus_cache_entry_handles_clear(ent); 404 memwipe(ent, 0, sizeof(consensus_cache_entry_t)); 405 tor_free(ent); 406 } 407 408 /** 409 * Mark <b>ent</b> for deletion from the cache. Deletion will not occur 410 * until the cache is the only place that holds a reference to <b>ent</b>. 411 */ 412 void 413 consensus_cache_entry_mark_for_removal(consensus_cache_entry_t *ent) 414 { 415 ent->can_remove = 1; 416 } 417 418 /** 419 * Mark <b>ent</b> as the kind of entry that we don't need to keep mmap'd for 420 * any longer than we're actually using it. 421 */ 422 void 423 consensus_cache_entry_mark_for_aggressive_release(consensus_cache_entry_t *ent) 424 { 425 ent->release_aggressively = 1; 426 } 427 428 /** 429 * Try to read the body of <b>ent</b> into memory if it isn't already 430 * loaded. On success, set *<b>body_out</b> to the body, *<b>sz_out</b> 431 * to its size, and return 0. On failure return -1. 432 * 433 * The resulting body pointer will only be valid for as long as you 434 * hold a reference to <b>ent</b>. 435 */ 436 int 437 consensus_cache_entry_get_body(const consensus_cache_entry_t *ent, 438 const uint8_t **body_out, 439 size_t *sz_out) 440 { 441 if (BUG(ent->magic != CCE_MAGIC)) 442 return -1; // LCOV_EXCL_LINE 443 444 if (! ent->map) { 445 if (! ent->in_cache) 446 return -1; 447 448 consensus_cache_entry_map((consensus_cache_t *)ent->in_cache, 449 (consensus_cache_entry_t *)ent); 450 if (! ent->map) { 451 return -1; 452 } 453 } 454 455 *body_out = ent->body; 456 *sz_out = ent->bodylen; 457 return 0; 458 } 459 460 /** 461 * Unmap every mmap'd element of <b>cache</b> that has been unused 462 * since <b>cutoff</b>. 463 */ 464 void 465 consensus_cache_unmap_lazy(consensus_cache_t *cache, time_t cutoff) 466 { 467 SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) { 468 tor_assert_nonfatal(ent->in_cache == cache); 469 if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) { 470 /* Somebody is using this entry right now */ 471 continue; 472 } 473 if (ent->unused_since > cutoff) { 474 /* Has been unused only for a little while */ 475 continue; 476 } 477 if (ent->map == NULL) { 478 /* Not actually mapped. */ 479 continue; 480 } 481 consensus_cache_entry_unmap(ent); 482 } SMARTLIST_FOREACH_END(ent); 483 } 484 485 /** 486 * Return the number of currently unused filenames available in this cache. 487 */ 488 int 489 consensus_cache_get_n_filenames_available(consensus_cache_t *cache) 490 { 491 tor_assert(cache); 492 int max = cache->max_entries; 493 int used = smartlist_len(storage_dir_list(cache->dir)); 494 #ifdef MUST_UNMAP_TO_UNLINK 495 if (used > max) 496 return 0; 497 #else 498 tor_assert_nonfatal(max >= used); 499 #endif /* defined(MUST_UNMAP_TO_UNLINK) */ 500 return max - used; 501 } 502 503 /** 504 * Delete every element of <b>cache</b> has been marked with 505 * consensus_cache_entry_mark_for_removal. If <b>force</b> is false, 506 * retain those entries which are in use by something other than the cache. 507 */ 508 void 509 consensus_cache_delete_pending(consensus_cache_t *cache, int force) 510 { 511 SMARTLIST_FOREACH_BEGIN(cache->entries, consensus_cache_entry_t *, ent) { 512 tor_assert_nonfatal(ent->in_cache == cache); 513 int force_ent = force; 514 #ifdef MUST_UNMAP_TO_UNLINK 515 /* We cannot delete anything with an active mmap on win32, so no 516 * force-deletion. */ 517 if (ent->map) { 518 force_ent = 0; 519 } 520 #endif /* defined(MUST_UNMAP_TO_UNLINK) */ 521 if (! force_ent) { 522 if (ent->refcnt > 1 || BUG(ent->in_cache == NULL)) { 523 /* Somebody is using this entry right now */ 524 continue; 525 } 526 } 527 if (ent->can_remove == 0) { 528 /* Don't want to delete this. */ 529 continue; 530 } 531 if (BUG(ent->refcnt <= 0)) { 532 continue; // LCOV_EXCL_LINE 533 } 534 535 SMARTLIST_DEL_CURRENT(cache->entries, ent); 536 ent->in_cache = NULL; 537 char *fname = tor_strdup(ent->fname); /* save a copy */ 538 consensus_cache_entry_decref(ent); 539 storage_dir_remove_file(cache->dir, fname); 540 tor_free(fname); 541 } SMARTLIST_FOREACH_END(ent); 542 } 543 544 /** 545 * Internal helper: rescan <b>cache</b> and rebuild its list of entries. 546 */ 547 static void 548 consensus_cache_rescan(consensus_cache_t *cache) 549 { 550 if (cache->entries) { 551 consensus_cache_clear(cache); 552 } 553 554 cache->entries = smartlist_new(); 555 const smartlist_t *fnames = storage_dir_list(cache->dir); 556 SMARTLIST_FOREACH_BEGIN(fnames, const char *, fname) { 557 tor_mmap_t *map = NULL; 558 config_line_t *labels = NULL; 559 const uint8_t *body; 560 size_t bodylen; 561 map = storage_dir_map_labeled(cache->dir, fname, 562 &labels, &body, &bodylen); 563 if (! map) { 564 /* The ERANGE error might come from tor_mmap_file() -- it means the file 565 * was empty. EINVAL might come from ..map_labeled() -- it means the 566 * file was misformatted. In both cases, we should just delete it. 567 */ 568 if (errno == ERANGE || errno == EINVAL) { 569 log_warn(LD_FS, "Found %s file %s in consensus cache; removing it.", 570 errno == ERANGE ? "empty" : "misformatted", 571 escaped(fname)); 572 storage_dir_remove_file(cache->dir, fname); 573 } else { 574 /* Can't load this; continue */ 575 log_warn(LD_FS, "Unable to map file %s from consensus cache: %s", 576 escaped(fname), strerror(errno)); 577 } 578 continue; 579 } 580 consensus_cache_entry_t *ent = 581 tor_malloc_zero(sizeof(consensus_cache_entry_t)); 582 ent->magic = CCE_MAGIC; 583 ent->fname = tor_strdup(fname); 584 ent->labels = labels; 585 ent->refcnt = 1; 586 ent->in_cache = cache; 587 ent->unused_since = TIME_MAX; 588 smartlist_add(cache->entries, ent); 589 tor_munmap_file(map); /* don't actually need to keep this around */ 590 } SMARTLIST_FOREACH_END(fname); 591 } 592 593 /** 594 * Make sure that <b>ent</b> is mapped into RAM. 595 */ 596 static void 597 consensus_cache_entry_map(consensus_cache_t *cache, 598 consensus_cache_entry_t *ent) 599 { 600 if (ent->map) 601 return; 602 603 ent->map = storage_dir_map_labeled(cache->dir, ent->fname, 604 NULL, &ent->body, &ent->bodylen); 605 ent->unused_since = TIME_MAX; 606 } 607 608 /** 609 * Unmap <b>ent</b> from RAM. 610 * 611 * Do not call this if something other than the cache is holding a reference 612 * to <b>ent</b> 613 */ 614 static void 615 consensus_cache_entry_unmap(consensus_cache_entry_t *ent) 616 { 617 ent->unused_since = TIME_MAX; 618 if (!ent->map) 619 return; 620 621 tor_munmap_file(ent->map); 622 ent->map = NULL; 623 ent->body = NULL; 624 ent->bodylen = 0; 625 ent->unused_since = TIME_MAX; 626 } 627 628 HANDLE_IMPL(consensus_cache_entry, consensus_cache_entry_t, ) 629 630 #ifdef TOR_UNIT_TESTS 631 /** 632 * Testing only: Return true iff <b>ent</b> is mapped into memory. 633 * 634 * (In normal operation, this information is not exposed.) 635 */ 636 int 637 consensus_cache_entry_is_mapped(consensus_cache_entry_t *ent) 638 { 639 if (ent->map) { 640 tor_assert(ent->body); 641 return 1; 642 } else { 643 tor_assert(!ent->body); 644 return 0; 645 } 646 } 647 #endif /* defined(TOR_UNIT_TESTS) */