unparseable.c (19244B)
1 /* Copyright (c) 2001 Matej Pfajfar. 2 * Copyright (c) 2001-2004, Roger Dingledine. 3 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. 4 * Copyright (c) 2007-2021, The Tor Project, Inc. */ 5 /* See LICENSE for licensing information */ 6 7 /** 8 * @file unparseable.c 9 * @brief Dump unparseable objects to disk. 10 **/ 11 12 #define UNPARSEABLE_PRIVATE 13 14 #include "core/or/or.h" 15 #include "app/config/config.h" 16 #include "feature/dirparse/unparseable.h" 17 #include "lib/sandbox/sandbox.h" 18 19 #ifdef HAVE_SYS_STAT_H 20 #include <sys/stat.h> 21 #endif 22 23 /* Dump mechanism for unparseable descriptors */ 24 25 /** List of dumped descriptors for FIFO cleanup purposes */ 26 STATIC smartlist_t *descs_dumped = NULL; 27 /** Total size of dumped descriptors for FIFO cleanup */ 28 STATIC uint64_t len_descs_dumped = 0; 29 /** Directory to stash dumps in */ 30 static int have_dump_desc_dir = 0; 31 static int problem_with_dump_desc_dir = 0; 32 33 #define DESC_DUMP_DATADIR_SUBDIR "unparseable-descs" 34 #define DESC_DUMP_BASE_FILENAME "unparseable-desc" 35 36 /** Find the dump directory and check if we'll be able to create it */ 37 void 38 dump_desc_init(void) 39 { 40 char *dump_desc_dir; 41 42 dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR); 43 44 /* 45 * We just check for it, don't create it at this point; we'll 46 * create it when we need it if it isn't already there. 47 */ 48 if (check_private_dir(dump_desc_dir, CPD_CHECK, get_options()->User) < 0) { 49 /* Error, log and flag it as having a problem */ 50 log_notice(LD_DIR, 51 "Doesn't look like we'll be able to create descriptor dump " 52 "directory %s; dumps will be disabled.", 53 dump_desc_dir); 54 problem_with_dump_desc_dir = 1; 55 tor_free(dump_desc_dir); 56 return; 57 } 58 59 /* Check if it exists */ 60 switch (file_status(dump_desc_dir)) { 61 case FN_DIR: 62 /* We already have a directory */ 63 have_dump_desc_dir = 1; 64 break; 65 case FN_NOENT: 66 /* Nothing, we'll need to create it later */ 67 have_dump_desc_dir = 0; 68 break; 69 case FN_ERROR: 70 /* Log and flag having a problem */ 71 log_notice(LD_DIR, 72 "Couldn't check whether descriptor dump directory %s already" 73 " exists: %s", 74 dump_desc_dir, strerror(errno)); 75 problem_with_dump_desc_dir = 1; 76 break; 77 case FN_FILE: 78 case FN_EMPTY: 79 default: 80 /* Something else was here! */ 81 log_notice(LD_DIR, 82 "Descriptor dump directory %s already exists and isn't a " 83 "directory", 84 dump_desc_dir); 85 problem_with_dump_desc_dir = 1; 86 } 87 88 if (have_dump_desc_dir && !problem_with_dump_desc_dir) { 89 dump_desc_populate_fifo_from_directory(dump_desc_dir); 90 } 91 92 tor_free(dump_desc_dir); 93 } 94 95 /** Create the dump directory if needed and possible */ 96 static void 97 dump_desc_create_dir(void) 98 { 99 char *dump_desc_dir; 100 101 /* If the problem flag is set, skip it */ 102 if (problem_with_dump_desc_dir) return; 103 104 /* Do we need it? */ 105 if (!have_dump_desc_dir) { 106 dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR); 107 108 if (check_private_dir(dump_desc_dir, CPD_CREATE, 109 get_options()->User) < 0) { 110 log_notice(LD_DIR, 111 "Failed to create descriptor dump directory %s", 112 dump_desc_dir); 113 problem_with_dump_desc_dir = 1; 114 } 115 116 /* Okay, we created it */ 117 have_dump_desc_dir = 1; 118 119 tor_free(dump_desc_dir); 120 } 121 } 122 123 /** Dump desc FIFO/cleanup; take ownership of the given filename, add it to 124 * the FIFO, and clean up the oldest entries to the extent they exceed the 125 * configured cap. If any old entries with a matching hash existed, they 126 * just got overwritten right before this was called and we should adjust 127 * the total size counter without deleting them. 128 */ 129 static void 130 dump_desc_fifo_add_and_clean(char *filename, const uint8_t *digest_sha256, 131 size_t len) 132 { 133 dumped_desc_t *ent = NULL, *tmp; 134 uint64_t max_len; 135 136 tor_assert(filename != NULL); 137 tor_assert(digest_sha256 != NULL); 138 139 if (descs_dumped == NULL) { 140 /* We better have no length, then */ 141 tor_assert(len_descs_dumped == 0); 142 /* Make a smartlist */ 143 descs_dumped = smartlist_new(); 144 } 145 146 /* Make a new entry to put this one in */ 147 ent = tor_malloc_zero(sizeof(*ent)); 148 ent->filename = filename; 149 ent->len = len; 150 ent->when = time(NULL); 151 memcpy(ent->digest_sha256, digest_sha256, DIGEST256_LEN); 152 153 /* Do we need to do some cleanup? */ 154 max_len = get_options()->MaxUnparseableDescSizeToLog; 155 /* Iterate over the list until we've freed enough space */ 156 while (len > max_len - len_descs_dumped && 157 smartlist_len(descs_dumped) > 0) { 158 /* Get the oldest thing on the list */ 159 tmp = (dumped_desc_t *)(smartlist_get(descs_dumped, 0)); 160 161 /* 162 * Check if it matches the filename we just added, so we don't delete 163 * something we just emitted if we get repeated identical descriptors. 164 */ 165 if (strcmp(tmp->filename, filename) != 0) { 166 /* Delete it and adjust the length counter */ 167 tor_unlink(tmp->filename); 168 tor_assert(len_descs_dumped >= tmp->len); 169 len_descs_dumped -= tmp->len; 170 log_info(LD_DIR, 171 "Deleting old unparseable descriptor dump %s due to " 172 "space limits", 173 tmp->filename); 174 } else { 175 /* 176 * Don't delete, but do adjust the counter since we will bump it 177 * later 178 */ 179 tor_assert(len_descs_dumped >= tmp->len); 180 len_descs_dumped -= tmp->len; 181 log_info(LD_DIR, 182 "Replacing old descriptor dump %s with new identical one", 183 tmp->filename); 184 } 185 186 /* Free it and remove it from the list */ 187 smartlist_del_keeporder(descs_dumped, 0); 188 tor_free(tmp->filename); 189 tor_free(tmp); 190 } 191 192 /* Append our entry to the end of the list and bump the counter */ 193 smartlist_add(descs_dumped, ent); 194 len_descs_dumped += len; 195 } 196 197 /** Check if we already have a descriptor for this hash and move it to the 198 * head of the queue if so. Return 1 if one existed and 0 otherwise. 199 */ 200 static int 201 dump_desc_fifo_bump_hash(const uint8_t *digest_sha256) 202 { 203 dumped_desc_t *match = NULL; 204 205 tor_assert(digest_sha256); 206 207 if (descs_dumped) { 208 /* Find a match if one exists */ 209 SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) { 210 if (ent && 211 tor_memeq(ent->digest_sha256, digest_sha256, DIGEST256_LEN)) { 212 /* 213 * Save a pointer to the match and remove it from its current 214 * position. 215 */ 216 match = ent; 217 SMARTLIST_DEL_CURRENT_KEEPORDER(descs_dumped, ent); 218 break; 219 } 220 } SMARTLIST_FOREACH_END(ent); 221 222 if (match) { 223 /* Update the timestamp */ 224 match->when = time(NULL); 225 /* Add it back at the end of the list */ 226 smartlist_add(descs_dumped, match); 227 228 /* Indicate we found one */ 229 return 1; 230 } 231 } 232 233 return 0; 234 } 235 236 /** Clean up on exit; just memory, leave the dumps behind 237 */ 238 void 239 dump_desc_fifo_cleanup(void) 240 { 241 if (descs_dumped) { 242 /* Free each descriptor */ 243 SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) { 244 tor_assert(ent); 245 tor_free(ent->filename); 246 tor_free(ent); 247 } SMARTLIST_FOREACH_END(ent); 248 /* Free the list */ 249 smartlist_free(descs_dumped); 250 descs_dumped = NULL; 251 len_descs_dumped = 0; 252 } 253 } 254 255 /** Handle one file for dump_desc_populate_fifo_from_directory(); make sure 256 * the filename is sensibly formed and matches the file content, and either 257 * return a dumped_desc_t for it or remove the file and return NULL. 258 */ 259 MOCK_IMPL(STATIC dumped_desc_t *, 260 dump_desc_populate_one_file, (const char *dirname, const char *f)) 261 { 262 dumped_desc_t *ent = NULL; 263 char *path = NULL, *desc = NULL; 264 const char *digest_str; 265 char digest[DIGEST256_LEN], content_digest[DIGEST256_LEN]; 266 /* Expected prefix before digest in filenames */ 267 const char *f_pfx = DESC_DUMP_BASE_FILENAME "."; 268 /* 269 * Stat while reading; this is important in case the file 270 * contains a NUL character. 271 */ 272 struct stat st; 273 274 /* Sanity-check args */ 275 tor_assert(dirname != NULL); 276 tor_assert(f != NULL); 277 278 /* Form the full path */ 279 tor_asprintf(&path, "%s" PATH_SEPARATOR "%s", dirname, f); 280 281 /* Check that f has the form DESC_DUMP_BASE_FILENAME.<digest256> */ 282 283 if (!strcmpstart(f, f_pfx)) { 284 /* It matches the form, but is the digest parseable as such? */ 285 digest_str = f + strlen(f_pfx); 286 if (base16_decode(digest, DIGEST256_LEN, 287 digest_str, strlen(digest_str)) != DIGEST256_LEN) { 288 /* We failed to decode it */ 289 digest_str = NULL; 290 } 291 } else { 292 /* No match */ 293 digest_str = NULL; 294 } 295 296 if (!digest_str) { 297 /* We couldn't get a sensible digest */ 298 log_notice(LD_DIR, 299 "Removing unrecognized filename %s from unparseable " 300 "descriptors directory", f); 301 tor_unlink(path); 302 /* We're done */ 303 goto done; 304 } 305 306 /* 307 * The filename has the form DESC_DUMP_BASE_FILENAME "." <digest256> and 308 * we've decoded the digest. Next, check that we can read it and the 309 * content matches this digest. We are relying on the fact that if the 310 * file contains a '\0', read_file_to_str() will allocate space for and 311 * read the entire file and return the correct size in st. 312 */ 313 desc = read_file_to_str(path, RFTS_IGNORE_MISSING|RFTS_BIN, &st); 314 if (!desc) { 315 /* We couldn't read it */ 316 log_notice(LD_DIR, 317 "Failed to read %s from unparseable descriptors directory; " 318 "attempting to remove it.", f); 319 tor_unlink(path); 320 /* We're done */ 321 goto done; 322 } 323 324 #if SIZE_MAX > UINT64_MAX 325 if (BUG((uint64_t)st.st_size > (uint64_t)SIZE_MAX)) { 326 /* LCOV_EXCL_START 327 * Should be impossible since RFTS above should have failed to read the 328 * huge file into RAM. */ 329 goto done; 330 /* LCOV_EXCL_STOP */ 331 } 332 #endif /* SIZE_MAX > UINT64_MAX */ 333 if (BUG(st.st_size < 0)) { 334 /* LCOV_EXCL_START 335 * Should be impossible, since the OS isn't supposed to be b0rken. */ 336 goto done; 337 /* LCOV_EXCL_STOP */ 338 } 339 /* (Now we can be sure that st.st_size is safe to cast to a size_t.) */ 340 341 /* 342 * We got one; now compute its digest and check that it matches the 343 * filename. 344 */ 345 if (crypto_digest256((char *)content_digest, desc, (size_t) st.st_size, 346 DIGEST_SHA256) < 0) { 347 /* Weird, but okay */ 348 log_info(LD_DIR, 349 "Unable to hash content of %s from unparseable descriptors " 350 "directory", f); 351 tor_unlink(path); 352 /* We're done */ 353 goto done; 354 } 355 356 /* Compare the digests */ 357 if (tor_memneq(digest, content_digest, DIGEST256_LEN)) { 358 /* No match */ 359 log_info(LD_DIR, 360 "Hash of %s from unparseable descriptors directory didn't " 361 "match its filename; removing it", f); 362 tor_unlink(path); 363 /* We're done */ 364 goto done; 365 } 366 367 /* Okay, it's a match, we should prepare ent */ 368 ent = tor_malloc_zero(sizeof(dumped_desc_t)); 369 ent->filename = path; 370 memcpy(ent->digest_sha256, digest, DIGEST256_LEN); 371 ent->len = (size_t) st.st_size; 372 ent->when = st.st_mtime; 373 /* Null out path so we don't free it out from under ent */ 374 path = NULL; 375 376 done: 377 /* Free allocations if we had them */ 378 tor_free(desc); 379 tor_free(path); 380 381 return ent; 382 } 383 384 /** Sort helper for dump_desc_populate_fifo_from_directory(); compares 385 * the when field of dumped_desc_ts in a smartlist to put the FIFO in 386 * the correct order after reconstructing it from the directory. 387 */ 388 static int 389 dump_desc_compare_fifo_entries(const void **a_v, const void **b_v) 390 { 391 const dumped_desc_t **a = (const dumped_desc_t **)a_v; 392 const dumped_desc_t **b = (const dumped_desc_t **)b_v; 393 394 if ((a != NULL) && (*a != NULL)) { 395 if ((b != NULL) && (*b != NULL)) { 396 /* We have sensible dumped_desc_ts to compare */ 397 if ((*a)->when < (*b)->when) { 398 return -1; 399 } else if ((*a)->when == (*b)->when) { 400 return 0; 401 } else { 402 return 1; 403 } 404 } else { 405 /* 406 * We shouldn't see this, but what the hell, NULLs precede everything 407 * else 408 */ 409 return 1; 410 } 411 } else { 412 return -1; 413 } 414 } 415 416 /** Scan the contents of the directory, and update FIFO/counters; this will 417 * consistency-check descriptor dump filenames against hashes of descriptor 418 * dump file content, and remove any inconsistent/unreadable dumps, and then 419 * reconstruct the dump FIFO as closely as possible for the last time the 420 * tor process shut down. If a previous dump was repeated more than once and 421 * moved ahead in the FIFO, the mtime will not have been updated and the 422 * reconstructed order will be wrong, but will always be a permutation of 423 * the original. 424 */ 425 STATIC void 426 dump_desc_populate_fifo_from_directory(const char *dirname) 427 { 428 smartlist_t *files = NULL; 429 dumped_desc_t *ent = NULL; 430 431 tor_assert(dirname != NULL); 432 433 /* Get a list of files */ 434 files = tor_listdir(dirname); 435 if (!files) { 436 log_notice(LD_DIR, 437 "Unable to get contents of unparseable descriptor dump " 438 "directory %s", 439 dirname); 440 return; 441 } 442 443 /* 444 * Iterate through the list and decide which files should go in the 445 * FIFO and which should be purged. 446 */ 447 448 SMARTLIST_FOREACH_BEGIN(files, char *, f) { 449 /* Try to get a FIFO entry */ 450 ent = dump_desc_populate_one_file(dirname, f); 451 if (ent) { 452 /* 453 * We got one; add it to the FIFO. No need for duplicate checking 454 * here since we just verified the name and digest match. 455 */ 456 457 /* Make sure we have a list to add it to */ 458 if (!descs_dumped) { 459 descs_dumped = smartlist_new(); 460 len_descs_dumped = 0; 461 } 462 463 /* Add it and adjust the counter */ 464 smartlist_add(descs_dumped, ent); 465 len_descs_dumped += ent->len; 466 } 467 /* 468 * If we didn't, we will have unlinked the file if necessary and 469 * possible, and emitted a log message about it, so just go on to 470 * the next. 471 */ 472 } SMARTLIST_FOREACH_END(f); 473 474 /* Did we get anything? */ 475 if (descs_dumped != NULL) { 476 /* Sort the FIFO in order of increasing timestamp */ 477 smartlist_sort(descs_dumped, dump_desc_compare_fifo_entries); 478 479 /* Log some stats */ 480 log_info(LD_DIR, 481 "Reloaded unparseable descriptor dump FIFO with %d dump(s) " 482 "totaling %"PRIu64 " bytes", 483 smartlist_len(descs_dumped), (len_descs_dumped)); 484 } 485 486 /* Free the original list */ 487 SMARTLIST_FOREACH(files, char *, f, tor_free(f)); 488 smartlist_free(files); 489 } 490 491 /** For debugging purposes, dump unparseable descriptor *<b>desc</b> of 492 * type *<b>type</b> to file $DATADIR/unparseable-desc. Do not write more 493 * than one descriptor to disk per minute. If there is already such a 494 * file in the data directory, overwrite it. */ 495 MOCK_IMPL(void, 496 dump_desc,(const char *desc, const char *type)) 497 { 498 tor_assert(desc); 499 tor_assert(type); 500 #ifndef TOR_UNIT_TESTS 501 /* For now, we are disabling this function, since it can be called with 502 * strings that are far too long. We can turn it back on if we fix it 503 * someday, but we'd need to give it a length argument. A likelier 504 * resolution here is simply to remove this module entirely. See tor#40286 505 * for background. */ 506 if (1) 507 return; 508 #endif 509 size_t len; 510 /* The SHA256 of the string */ 511 uint8_t digest_sha256[DIGEST256_LEN]; 512 char digest_sha256_hex[HEX_DIGEST256_LEN+1]; 513 /* Filename to log it to */ 514 char *debugfile, *debugfile_base; 515 516 /* Get the hash for logging purposes anyway */ 517 len = strlen(desc); 518 if (crypto_digest256((char *)digest_sha256, desc, len, 519 DIGEST_SHA256) < 0) { 520 log_info(LD_DIR, 521 "Unable to parse descriptor of type %s, and unable to even hash" 522 " it!", type); 523 goto err; 524 } 525 526 base16_encode(digest_sha256_hex, sizeof(digest_sha256_hex), 527 (const char *)digest_sha256, sizeof(digest_sha256)); 528 529 /* 530 * We mention type and hash in the main log; don't clutter up the files 531 * with anything but the exact dump. 532 */ 533 tor_asprintf(&debugfile_base, 534 DESC_DUMP_BASE_FILENAME ".%s", digest_sha256_hex); 535 debugfile = get_datadir_fname2(DESC_DUMP_DATADIR_SUBDIR, debugfile_base); 536 537 /* 538 * Check if the sandbox is active or will become active; see comment 539 * below at the log message for why. 540 */ 541 if (!(sandbox_is_active() || get_options()->Sandbox)) { 542 if (len <= get_options()->MaxUnparseableDescSizeToLog) { 543 if (!dump_desc_fifo_bump_hash(digest_sha256)) { 544 /* Create the directory if needed */ 545 dump_desc_create_dir(); 546 /* Make sure we've got it */ 547 if (have_dump_desc_dir && !problem_with_dump_desc_dir) { 548 /* Write it, and tell the main log about it */ 549 write_str_to_file(debugfile, desc, 1); 550 log_info(LD_DIR, 551 "Unable to parse descriptor of type %s with hash %s and " 552 "length %lu. See file %s in data directory for details.", 553 type, digest_sha256_hex, (unsigned long)len, 554 debugfile_base); 555 dump_desc_fifo_add_and_clean(debugfile, digest_sha256, len); 556 /* Since we handed ownership over, don't free debugfile later */ 557 debugfile = NULL; 558 } else { 559 /* Problem with the subdirectory */ 560 log_info(LD_DIR, 561 "Unable to parse descriptor of type %s with hash %s and " 562 "length %lu. Descriptor not dumped because we had a " 563 "problem creating the " DESC_DUMP_DATADIR_SUBDIR 564 " subdirectory", 565 type, digest_sha256_hex, (unsigned long)len); 566 /* We do have to free debugfile in this case */ 567 } 568 } else { 569 /* We already had one with this hash dumped */ 570 log_info(LD_DIR, 571 "Unable to parse descriptor of type %s with hash %s and " 572 "length %lu. Descriptor not dumped because one with that " 573 "hash has already been dumped.", 574 type, digest_sha256_hex, (unsigned long)len); 575 /* We do have to free debugfile in this case */ 576 } 577 } else { 578 /* Just log that it happened without dumping */ 579 log_info(LD_DIR, 580 "Unable to parse descriptor of type %s with hash %s and " 581 "length %lu. Descriptor not dumped because it exceeds maximum" 582 " log size all by itself.", 583 type, digest_sha256_hex, (unsigned long)len); 584 /* We do have to free debugfile in this case */ 585 } 586 } else { 587 /* 588 * Not logging because the sandbox is active and seccomp2 apparently 589 * doesn't have a sensible way to allow filenames according to a pattern 590 * match. (If we ever figure out how to say "allow writes to /regex/", 591 * remove this checK). 592 */ 593 log_info(LD_DIR, 594 "Unable to parse descriptor of type %s with hash %s and " 595 "length %lu. Descriptor not dumped because the sandbox is " 596 "configured", 597 type, digest_sha256_hex, (unsigned long)len); 598 } 599 600 tor_free(debugfile_base); 601 tor_free(debugfile); 602 603 err: 604 return; 605 }