tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

unparseable.c (19244B)


      1 /* Copyright (c) 2001 Matej Pfajfar.
      2 * Copyright (c) 2001-2004, Roger Dingledine.
      3 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
      4 * Copyright (c) 2007-2021, The Tor Project, Inc. */
      5 /* See LICENSE for licensing information */
      6 
      7 /**
      8 * @file unparseable.c
      9 * @brief Dump unparseable objects to disk.
     10 **/
     11 
     12 #define UNPARSEABLE_PRIVATE
     13 
     14 #include "core/or/or.h"
     15 #include "app/config/config.h"
     16 #include "feature/dirparse/unparseable.h"
     17 #include "lib/sandbox/sandbox.h"
     18 
     19 #ifdef HAVE_SYS_STAT_H
     20 #include <sys/stat.h>
     21 #endif
     22 
     23 /* Dump mechanism for unparseable descriptors */
     24 
     25 /** List of dumped descriptors for FIFO cleanup purposes */
     26 STATIC smartlist_t *descs_dumped = NULL;
     27 /** Total size of dumped descriptors for FIFO cleanup */
     28 STATIC uint64_t len_descs_dumped = 0;
     29 /** Directory to stash dumps in */
     30 static int have_dump_desc_dir = 0;
     31 static int problem_with_dump_desc_dir = 0;
     32 
     33 #define DESC_DUMP_DATADIR_SUBDIR "unparseable-descs"
     34 #define DESC_DUMP_BASE_FILENAME "unparseable-desc"
     35 
     36 /** Find the dump directory and check if we'll be able to create it */
     37 void
     38 dump_desc_init(void)
     39 {
     40  char *dump_desc_dir;
     41 
     42  dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR);
     43 
     44  /*
     45   * We just check for it, don't create it at this point; we'll
     46   * create it when we need it if it isn't already there.
     47   */
     48  if (check_private_dir(dump_desc_dir, CPD_CHECK, get_options()->User) < 0) {
     49    /* Error, log and flag it as having a problem */
     50    log_notice(LD_DIR,
     51               "Doesn't look like we'll be able to create descriptor dump "
     52               "directory %s; dumps will be disabled.",
     53               dump_desc_dir);
     54    problem_with_dump_desc_dir = 1;
     55    tor_free(dump_desc_dir);
     56    return;
     57  }
     58 
     59  /* Check if it exists */
     60  switch (file_status(dump_desc_dir)) {
     61    case FN_DIR:
     62      /* We already have a directory */
     63      have_dump_desc_dir = 1;
     64      break;
     65    case FN_NOENT:
     66      /* Nothing, we'll need to create it later */
     67      have_dump_desc_dir = 0;
     68      break;
     69    case FN_ERROR:
     70      /* Log and flag having a problem */
     71      log_notice(LD_DIR,
     72                 "Couldn't check whether descriptor dump directory %s already"
     73                 " exists: %s",
     74                 dump_desc_dir, strerror(errno));
     75      problem_with_dump_desc_dir = 1;
     76      break;
     77    case FN_FILE:
     78    case FN_EMPTY:
     79    default:
     80      /* Something else was here! */
     81      log_notice(LD_DIR,
     82                 "Descriptor dump directory %s already exists and isn't a "
     83                 "directory",
     84                 dump_desc_dir);
     85      problem_with_dump_desc_dir = 1;
     86  }
     87 
     88  if (have_dump_desc_dir && !problem_with_dump_desc_dir) {
     89    dump_desc_populate_fifo_from_directory(dump_desc_dir);
     90  }
     91 
     92  tor_free(dump_desc_dir);
     93 }
     94 
     95 /** Create the dump directory if needed and possible */
     96 static void
     97 dump_desc_create_dir(void)
     98 {
     99  char *dump_desc_dir;
    100 
    101  /* If the problem flag is set, skip it */
    102  if (problem_with_dump_desc_dir) return;
    103 
    104  /* Do we need it? */
    105  if (!have_dump_desc_dir) {
    106    dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR);
    107 
    108    if (check_private_dir(dump_desc_dir, CPD_CREATE,
    109                          get_options()->User) < 0) {
    110      log_notice(LD_DIR,
    111                 "Failed to create descriptor dump directory %s",
    112                 dump_desc_dir);
    113      problem_with_dump_desc_dir = 1;
    114    }
    115 
    116    /* Okay, we created it */
    117    have_dump_desc_dir = 1;
    118 
    119    tor_free(dump_desc_dir);
    120  }
    121 }
    122 
    123 /** Dump desc FIFO/cleanup; take ownership of the given filename, add it to
    124 * the FIFO, and clean up the oldest entries to the extent they exceed the
    125 * configured cap.  If any old entries with a matching hash existed, they
    126 * just got overwritten right before this was called and we should adjust
    127 * the total size counter without deleting them.
    128 */
    129 static void
    130 dump_desc_fifo_add_and_clean(char *filename, const uint8_t *digest_sha256,
    131                             size_t len)
    132 {
    133  dumped_desc_t *ent = NULL, *tmp;
    134  uint64_t max_len;
    135 
    136  tor_assert(filename != NULL);
    137  tor_assert(digest_sha256 != NULL);
    138 
    139  if (descs_dumped == NULL) {
    140    /* We better have no length, then */
    141    tor_assert(len_descs_dumped == 0);
    142    /* Make a smartlist */
    143    descs_dumped = smartlist_new();
    144  }
    145 
    146  /* Make a new entry to put this one in */
    147  ent = tor_malloc_zero(sizeof(*ent));
    148  ent->filename = filename;
    149  ent->len = len;
    150  ent->when = time(NULL);
    151  memcpy(ent->digest_sha256, digest_sha256, DIGEST256_LEN);
    152 
    153  /* Do we need to do some cleanup? */
    154  max_len = get_options()->MaxUnparseableDescSizeToLog;
    155  /* Iterate over the list until we've freed enough space */
    156  while (len > max_len - len_descs_dumped &&
    157         smartlist_len(descs_dumped) > 0) {
    158    /* Get the oldest thing on the list */
    159    tmp = (dumped_desc_t *)(smartlist_get(descs_dumped, 0));
    160 
    161    /*
    162     * Check if it matches the filename we just added, so we don't delete
    163     * something we just emitted if we get repeated identical descriptors.
    164     */
    165    if (strcmp(tmp->filename, filename) != 0) {
    166      /* Delete it and adjust the length counter */
    167      tor_unlink(tmp->filename);
    168      tor_assert(len_descs_dumped >= tmp->len);
    169      len_descs_dumped -= tmp->len;
    170      log_info(LD_DIR,
    171               "Deleting old unparseable descriptor dump %s due to "
    172               "space limits",
    173               tmp->filename);
    174    } else {
    175      /*
    176       * Don't delete, but do adjust the counter since we will bump it
    177       * later
    178       */
    179      tor_assert(len_descs_dumped >= tmp->len);
    180      len_descs_dumped -= tmp->len;
    181      log_info(LD_DIR,
    182               "Replacing old descriptor dump %s with new identical one",
    183               tmp->filename);
    184    }
    185 
    186    /* Free it and remove it from the list */
    187    smartlist_del_keeporder(descs_dumped, 0);
    188    tor_free(tmp->filename);
    189    tor_free(tmp);
    190  }
    191 
    192  /* Append our entry to the end of the list and bump the counter */
    193  smartlist_add(descs_dumped, ent);
    194  len_descs_dumped += len;
    195 }
    196 
    197 /** Check if we already have a descriptor for this hash and move it to the
    198 * head of the queue if so.  Return 1 if one existed and 0 otherwise.
    199 */
    200 static int
    201 dump_desc_fifo_bump_hash(const uint8_t *digest_sha256)
    202 {
    203  dumped_desc_t *match = NULL;
    204 
    205  tor_assert(digest_sha256);
    206 
    207  if (descs_dumped) {
    208    /* Find a match if one exists */
    209    SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) {
    210      if (ent &&
    211          tor_memeq(ent->digest_sha256, digest_sha256, DIGEST256_LEN)) {
    212        /*
    213         * Save a pointer to the match and remove it from its current
    214         * position.
    215         */
    216        match = ent;
    217        SMARTLIST_DEL_CURRENT_KEEPORDER(descs_dumped, ent);
    218        break;
    219      }
    220    } SMARTLIST_FOREACH_END(ent);
    221 
    222    if (match) {
    223      /* Update the timestamp */
    224      match->when = time(NULL);
    225      /* Add it back at the end of the list */
    226      smartlist_add(descs_dumped, match);
    227 
    228      /* Indicate we found one */
    229      return 1;
    230    }
    231  }
    232 
    233  return 0;
    234 }
    235 
    236 /** Clean up on exit; just memory, leave the dumps behind
    237 */
    238 void
    239 dump_desc_fifo_cleanup(void)
    240 {
    241  if (descs_dumped) {
    242    /* Free each descriptor */
    243    SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) {
    244      tor_assert(ent);
    245      tor_free(ent->filename);
    246      tor_free(ent);
    247    } SMARTLIST_FOREACH_END(ent);
    248    /* Free the list */
    249    smartlist_free(descs_dumped);
    250    descs_dumped = NULL;
    251    len_descs_dumped = 0;
    252  }
    253 }
    254 
    255 /** Handle one file for dump_desc_populate_fifo_from_directory(); make sure
    256 * the filename is sensibly formed and matches the file content, and either
    257 * return a dumped_desc_t for it or remove the file and return NULL.
    258 */
    259 MOCK_IMPL(STATIC dumped_desc_t *,
    260 dump_desc_populate_one_file, (const char *dirname, const char *f))
    261 {
    262  dumped_desc_t *ent = NULL;
    263  char *path = NULL, *desc = NULL;
    264  const char *digest_str;
    265  char digest[DIGEST256_LEN], content_digest[DIGEST256_LEN];
    266  /* Expected prefix before digest in filenames */
    267  const char *f_pfx = DESC_DUMP_BASE_FILENAME ".";
    268  /*
    269   * Stat while reading; this is important in case the file
    270   * contains a NUL character.
    271   */
    272  struct stat st;
    273 
    274  /* Sanity-check args */
    275  tor_assert(dirname != NULL);
    276  tor_assert(f != NULL);
    277 
    278  /* Form the full path */
    279  tor_asprintf(&path, "%s" PATH_SEPARATOR "%s", dirname, f);
    280 
    281  /* Check that f has the form DESC_DUMP_BASE_FILENAME.<digest256> */
    282 
    283  if (!strcmpstart(f, f_pfx)) {
    284    /* It matches the form, but is the digest parseable as such? */
    285    digest_str = f + strlen(f_pfx);
    286    if (base16_decode(digest, DIGEST256_LEN,
    287                      digest_str, strlen(digest_str)) != DIGEST256_LEN) {
    288      /* We failed to decode it */
    289      digest_str = NULL;
    290    }
    291  } else {
    292    /* No match */
    293    digest_str = NULL;
    294  }
    295 
    296  if (!digest_str) {
    297    /* We couldn't get a sensible digest */
    298    log_notice(LD_DIR,
    299               "Removing unrecognized filename %s from unparseable "
    300               "descriptors directory", f);
    301    tor_unlink(path);
    302    /* We're done */
    303    goto done;
    304  }
    305 
    306  /*
    307   * The filename has the form DESC_DUMP_BASE_FILENAME "." <digest256> and
    308   * we've decoded the digest.  Next, check that we can read it and the
    309   * content matches this digest.  We are relying on the fact that if the
    310   * file contains a '\0', read_file_to_str() will allocate space for and
    311   * read the entire file and return the correct size in st.
    312   */
    313  desc = read_file_to_str(path, RFTS_IGNORE_MISSING|RFTS_BIN, &st);
    314  if (!desc) {
    315    /* We couldn't read it */
    316    log_notice(LD_DIR,
    317               "Failed to read %s from unparseable descriptors directory; "
    318               "attempting to remove it.", f);
    319    tor_unlink(path);
    320    /* We're done */
    321    goto done;
    322  }
    323 
    324 #if SIZE_MAX > UINT64_MAX
    325  if (BUG((uint64_t)st.st_size > (uint64_t)SIZE_MAX)) {
    326    /* LCOV_EXCL_START
    327     * Should be impossible since RFTS above should have failed to read the
    328     * huge file into RAM. */
    329    goto done;
    330    /* LCOV_EXCL_STOP */
    331  }
    332 #endif /* SIZE_MAX > UINT64_MAX */
    333  if (BUG(st.st_size < 0)) {
    334    /* LCOV_EXCL_START
    335     * Should be impossible, since the OS isn't supposed to be b0rken. */
    336    goto done;
    337    /* LCOV_EXCL_STOP */
    338  }
    339  /* (Now we can be sure that st.st_size is safe to cast to a size_t.) */
    340 
    341  /*
    342   * We got one; now compute its digest and check that it matches the
    343   * filename.
    344   */
    345  if (crypto_digest256((char *)content_digest, desc, (size_t) st.st_size,
    346                       DIGEST_SHA256) < 0) {
    347    /* Weird, but okay */
    348    log_info(LD_DIR,
    349             "Unable to hash content of %s from unparseable descriptors "
    350             "directory", f);
    351    tor_unlink(path);
    352    /* We're done */
    353    goto done;
    354  }
    355 
    356  /* Compare the digests */
    357  if (tor_memneq(digest, content_digest, DIGEST256_LEN)) {
    358    /* No match */
    359    log_info(LD_DIR,
    360             "Hash of %s from unparseable descriptors directory didn't "
    361             "match its filename; removing it", f);
    362    tor_unlink(path);
    363    /* We're done */
    364    goto done;
    365  }
    366 
    367  /* Okay, it's a match, we should prepare ent */
    368  ent = tor_malloc_zero(sizeof(dumped_desc_t));
    369  ent->filename = path;
    370  memcpy(ent->digest_sha256, digest, DIGEST256_LEN);
    371  ent->len = (size_t) st.st_size;
    372  ent->when = st.st_mtime;
    373  /* Null out path so we don't free it out from under ent */
    374  path = NULL;
    375 
    376 done:
    377  /* Free allocations if we had them */
    378  tor_free(desc);
    379  tor_free(path);
    380 
    381  return ent;
    382 }
    383 
    384 /** Sort helper for dump_desc_populate_fifo_from_directory(); compares
    385 * the when field of dumped_desc_ts in a smartlist to put the FIFO in
    386 * the correct order after reconstructing it from the directory.
    387 */
    388 static int
    389 dump_desc_compare_fifo_entries(const void **a_v, const void **b_v)
    390 {
    391  const dumped_desc_t **a = (const dumped_desc_t **)a_v;
    392  const dumped_desc_t **b = (const dumped_desc_t **)b_v;
    393 
    394  if ((a != NULL) && (*a != NULL)) {
    395    if ((b != NULL) && (*b != NULL)) {
    396      /* We have sensible dumped_desc_ts to compare */
    397      if ((*a)->when < (*b)->when) {
    398        return -1;
    399      } else if ((*a)->when == (*b)->when) {
    400        return 0;
    401      } else {
    402        return 1;
    403      }
    404    } else {
    405      /*
    406       * We shouldn't see this, but what the hell, NULLs precede everything
    407       * else
    408       */
    409      return 1;
    410    }
    411  } else {
    412    return -1;
    413  }
    414 }
    415 
    416 /** Scan the contents of the directory, and update FIFO/counters; this will
    417 * consistency-check descriptor dump filenames against hashes of descriptor
    418 * dump file content, and remove any inconsistent/unreadable dumps, and then
    419 * reconstruct the dump FIFO as closely as possible for the last time the
    420 * tor process shut down.  If a previous dump was repeated more than once and
    421 * moved ahead in the FIFO, the mtime will not have been updated and the
    422 * reconstructed order will be wrong, but will always be a permutation of
    423 * the original.
    424 */
    425 STATIC void
    426 dump_desc_populate_fifo_from_directory(const char *dirname)
    427 {
    428  smartlist_t *files = NULL;
    429  dumped_desc_t *ent = NULL;
    430 
    431  tor_assert(dirname != NULL);
    432 
    433  /* Get a list of files */
    434  files = tor_listdir(dirname);
    435  if (!files) {
    436    log_notice(LD_DIR,
    437               "Unable to get contents of unparseable descriptor dump "
    438               "directory %s",
    439               dirname);
    440    return;
    441  }
    442 
    443  /*
    444   * Iterate through the list and decide which files should go in the
    445   * FIFO and which should be purged.
    446   */
    447 
    448  SMARTLIST_FOREACH_BEGIN(files, char *, f) {
    449    /* Try to get a FIFO entry */
    450    ent = dump_desc_populate_one_file(dirname, f);
    451    if (ent) {
    452      /*
    453       * We got one; add it to the FIFO.  No need for duplicate checking
    454       * here since we just verified the name and digest match.
    455       */
    456 
    457      /* Make sure we have a list to add it to */
    458      if (!descs_dumped) {
    459        descs_dumped = smartlist_new();
    460        len_descs_dumped = 0;
    461      }
    462 
    463      /* Add it and adjust the counter */
    464      smartlist_add(descs_dumped, ent);
    465      len_descs_dumped += ent->len;
    466    }
    467    /*
    468     * If we didn't, we will have unlinked the file if necessary and
    469     * possible, and emitted a log message about it, so just go on to
    470     * the next.
    471     */
    472  } SMARTLIST_FOREACH_END(f);
    473 
    474  /* Did we get anything? */
    475  if (descs_dumped != NULL) {
    476    /* Sort the FIFO in order of increasing timestamp */
    477    smartlist_sort(descs_dumped, dump_desc_compare_fifo_entries);
    478 
    479    /* Log some stats */
    480    log_info(LD_DIR,
    481             "Reloaded unparseable descriptor dump FIFO with %d dump(s) "
    482             "totaling %"PRIu64 " bytes",
    483             smartlist_len(descs_dumped), (len_descs_dumped));
    484  }
    485 
    486  /* Free the original list */
    487  SMARTLIST_FOREACH(files, char *, f, tor_free(f));
    488  smartlist_free(files);
    489 }
    490 
    491 /** For debugging purposes, dump unparseable descriptor *<b>desc</b> of
    492 * type *<b>type</b> to file $DATADIR/unparseable-desc. Do not write more
    493 * than one descriptor to disk per minute. If there is already such a
    494 * file in the data directory, overwrite it. */
    495 MOCK_IMPL(void,
    496 dump_desc,(const char *desc, const char *type))
    497 {
    498  tor_assert(desc);
    499  tor_assert(type);
    500 #ifndef TOR_UNIT_TESTS
    501  /* For now, we are disabling this function, since it can be called with
    502   * strings that are far too long.  We can turn it back on if we fix it
    503   * someday, but we'd need to give it a length argument. A likelier
    504   * resolution here is simply to remove this module entirely.  See tor#40286
    505   * for background. */
    506  if (1)
    507    return;
    508 #endif
    509  size_t len;
    510  /* The SHA256 of the string */
    511  uint8_t digest_sha256[DIGEST256_LEN];
    512  char digest_sha256_hex[HEX_DIGEST256_LEN+1];
    513  /* Filename to log it to */
    514  char *debugfile, *debugfile_base;
    515 
    516  /* Get the hash for logging purposes anyway */
    517  len = strlen(desc);
    518  if (crypto_digest256((char *)digest_sha256, desc, len,
    519                       DIGEST_SHA256) < 0) {
    520    log_info(LD_DIR,
    521             "Unable to parse descriptor of type %s, and unable to even hash"
    522             " it!", type);
    523    goto err;
    524  }
    525 
    526  base16_encode(digest_sha256_hex, sizeof(digest_sha256_hex),
    527                (const char *)digest_sha256, sizeof(digest_sha256));
    528 
    529  /*
    530   * We mention type and hash in the main log; don't clutter up the files
    531   * with anything but the exact dump.
    532   */
    533  tor_asprintf(&debugfile_base,
    534               DESC_DUMP_BASE_FILENAME ".%s", digest_sha256_hex);
    535  debugfile = get_datadir_fname2(DESC_DUMP_DATADIR_SUBDIR, debugfile_base);
    536 
    537  /*
    538   * Check if the sandbox is active or will become active; see comment
    539   * below at the log message for why.
    540   */
    541  if (!(sandbox_is_active() || get_options()->Sandbox)) {
    542    if (len <= get_options()->MaxUnparseableDescSizeToLog) {
    543      if (!dump_desc_fifo_bump_hash(digest_sha256)) {
    544        /* Create the directory if needed */
    545        dump_desc_create_dir();
    546        /* Make sure we've got it */
    547        if (have_dump_desc_dir && !problem_with_dump_desc_dir) {
    548          /* Write it, and tell the main log about it */
    549          write_str_to_file(debugfile, desc, 1);
    550          log_info(LD_DIR,
    551                   "Unable to parse descriptor of type %s with hash %s and "
    552                   "length %lu. See file %s in data directory for details.",
    553                   type, digest_sha256_hex, (unsigned long)len,
    554                   debugfile_base);
    555          dump_desc_fifo_add_and_clean(debugfile, digest_sha256, len);
    556          /* Since we handed ownership over, don't free debugfile later */
    557          debugfile = NULL;
    558        } else {
    559          /* Problem with the subdirectory */
    560          log_info(LD_DIR,
    561                   "Unable to parse descriptor of type %s with hash %s and "
    562                   "length %lu. Descriptor not dumped because we had a "
    563                   "problem creating the " DESC_DUMP_DATADIR_SUBDIR
    564                   " subdirectory",
    565                   type, digest_sha256_hex, (unsigned long)len);
    566          /* We do have to free debugfile in this case */
    567        }
    568      } else {
    569        /* We already had one with this hash dumped */
    570        log_info(LD_DIR,
    571                 "Unable to parse descriptor of type %s with hash %s and "
    572                 "length %lu. Descriptor not dumped because one with that "
    573                 "hash has already been dumped.",
    574                 type, digest_sha256_hex, (unsigned long)len);
    575        /* We do have to free debugfile in this case */
    576      }
    577    } else {
    578      /* Just log that it happened without dumping */
    579      log_info(LD_DIR,
    580               "Unable to parse descriptor of type %s with hash %s and "
    581               "length %lu. Descriptor not dumped because it exceeds maximum"
    582               " log size all by itself.",
    583               type, digest_sha256_hex, (unsigned long)len);
    584      /* We do have to free debugfile in this case */
    585    }
    586  } else {
    587    /*
    588     * Not logging because the sandbox is active and seccomp2 apparently
    589     * doesn't have a sensible way to allow filenames according to a pattern
    590     * match.  (If we ever figure out how to say "allow writes to /regex/",
    591     * remove this checK).
    592     */
    593    log_info(LD_DIR,
    594             "Unable to parse descriptor of type %s with hash %s and "
    595             "length %lu. Descriptor not dumped because the sandbox is "
    596             "configured",
    597             type, digest_sha256_hex, (unsigned long)len);
    598  }
    599 
    600  tor_free(debugfile_base);
    601  tor_free(debugfile);
    602 
    603 err:
    604  return;
    605 }