tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

consdiff.c (45997B)


      1 /* Copyright (c) 2014, Daniel Martí
      2 * Copyright (c) 2014-2021, The Tor Project, Inc. */
      3 /* See LICENSE for licensing information */
      4 
      5 /**
      6 * \file consdiff.c
      7 * \brief Consensus diff implementation, including both the generation and the
      8 * application of diffs in a minimal ed format.
      9 *
     10 * The consensus diff application is done in consdiff_apply_diff, which relies
     11 * on apply_ed_diff for the main ed diff part and on some digest helper
     12 * functions to check the digest hashes found in the consensus diff header.
     13 *
     14 * The consensus diff generation is more complex. consdiff_gen_diff generates
     15 * it, relying on gen_ed_diff to generate the ed diff and some digest helper
     16 * functions to generate the digest hashes.
     17 *
     18 * gen_ed_diff is the tricky bit. In it simplest form, it will take quadratic
     19 * time and linear space to generate an ed diff given two smartlists. As shown
     20 * in its comment section, calling calc_changes on the entire two consensuses
     21 * will calculate what is to be added and what is to be deleted in the diff.
     22 * Its comment section briefly explains how it works.
     23 *
     24 * In our case specific to consensuses, we take advantage of the fact that
     25 * consensuses list routers sorted by their identities. We use that
     26 * information to avoid running calc_changes on the whole smartlists.
     27 * gen_ed_diff will navigate through the two consensuses identity by identity
     28 * and will send small couples of slices to calc_changes, keeping the running
     29 * time near-linear. This is explained in more detail in the gen_ed_diff
     30 * comments.
     31 *
     32 * The allocation strategy tries to save time and memory by avoiding needless
     33 * copies.  Instead of actually splitting the inputs into separate strings, we
     34 * allocate cdline_t objects, each of which represents a line in the original
     35 * object or in the output.  We use memarea_t allocators to manage the
     36 * temporary memory we use when generating or applying diffs.
     37 **/
     38 
     39 #define CONSDIFF_PRIVATE
     40 
     41 #include "core/or/or.h"
     42 #include "feature/dircommon/consdiff.h"
     43 #include "lib/memarea/memarea.h"
     44 #include "feature/dirparse/ns_parse.h"
     45 
     46 static const char* ns_diff_version = "network-status-diff-version 1";
     47 static const char* hash_token = "hash";
     48 
     49 static char *consensus_join_lines(const smartlist_t *inp);
     50 
     51 /** Return true iff a and b have the same contents. */
     52 STATIC int
     53 lines_eq(const cdline_t *a, const cdline_t *b)
     54 {
     55  return a->len == b->len && fast_memeq(a->s, b->s, a->len);
     56 }
     57 
     58 /** Return true iff a has the same contents as the nul-terminated string b. */
     59 STATIC int
     60 line_str_eq(const cdline_t *a, const char *b)
     61 {
     62  const size_t len = strlen(b);
     63  tor_assert(len <= UINT32_MAX);
     64  cdline_t bline = { b, (uint32_t)len };
     65  return lines_eq(a, &bline);
     66 }
     67 
     68 /** Return true iff a begins with the same contents as the nul-terminated
     69 * string b. */
     70 static int
     71 line_starts_with_str(const cdline_t *a, const char *b)
     72 {
     73  const size_t len = strlen(b);
     74  tor_assert(len <= UINT32_MAX);
     75  return a->len >= len && fast_memeq(a->s, b, len);
     76 }
     77 
     78 /** Return a new cdline_t holding as its contents the nul-terminated
     79 * string s. Use the provided memory area for storage. */
     80 static cdline_t *
     81 cdline_linecpy(memarea_t *area, const char *s)
     82 {
     83  size_t len = strlen(s);
     84  const char *ss = memarea_memdup(area, s, len);
     85  cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
     86  line->s = ss;
     87  line->len = (uint32_t)len;
     88  return line;
     89 }
     90 
     91 /** Add a cdline_t to <b>lst</b> holding as its contents the nul-terminated
     92 * string s.  Use the provided memory area for storage. */
     93 STATIC void
     94 smartlist_add_linecpy(smartlist_t *lst, memarea_t *area, const char *s)
     95 {
     96  smartlist_add(lst, cdline_linecpy(area, s));
     97 }
     98 
     99 /** Compute the digest of <b>cons</b>, and store the result in
    100 * <b>digest_out</b>. Return 0 on success, -1 on failure. */
    101 /* This is a separate, mockable function so that we can override it when
    102 * fuzzing. */
    103 MOCK_IMPL(STATIC int,
    104 consensus_compute_digest,(const char *cons, size_t len,
    105                          consensus_digest_t *digest_out))
    106 {
    107  int r = crypto_digest256((char*)digest_out->sha3_256,
    108                           cons, len, DIGEST_SHA3_256);
    109  return r;
    110 }
    111 
    112 /** Compute the digest-as-signed of <b>cons</b>, and store the result in
    113 * <b>digest_out</b>. Return 0 on success, -1 on failure. */
    114 /* This is a separate, mockable function so that we can override it when
    115 * fuzzing. */
    116 MOCK_IMPL(STATIC int,
    117 consensus_compute_digest_as_signed,(const char *cons, size_t len,
    118                                    consensus_digest_t *digest_out))
    119 {
    120  return router_get_networkstatus_v3_sha3_as_signed(digest_out->sha3_256,
    121                                                    cons, len);
    122 }
    123 
    124 /** Return true iff <b>d1</b> and <b>d2</b> contain the same digest */
    125 /* This is a separate, mockable function so that we can override it when
    126 * fuzzing. */
    127 MOCK_IMPL(STATIC int,
    128 consensus_digest_eq,(const uint8_t *d1,
    129                     const uint8_t *d2))
    130 {
    131  return fast_memeq(d1, d2, DIGEST256_LEN);
    132 }
    133 
    134 /** Create (allocate) a new slice from a smartlist. Assumes that the start
    135 * and the end indexes are within the bounds of the initial smartlist. The end
    136 * element is not part of the resulting slice. If end is -1, the slice is to
    137 * reach the end of the smartlist.
    138 */
    139 STATIC smartlist_slice_t *
    140 smartlist_slice(const smartlist_t *list, int start, int end)
    141 {
    142  int list_len = smartlist_len(list);
    143  tor_assert(start >= 0);
    144  tor_assert(start <= list_len);
    145  if (end == -1) {
    146    end = list_len;
    147  }
    148  tor_assert(start <= end);
    149 
    150  smartlist_slice_t *slice = tor_malloc(sizeof(smartlist_slice_t));
    151  slice->list = list;
    152  slice->offset = start;
    153  slice->len = end - start;
    154  return slice;
    155 }
    156 
    157 /** Helper: Compute the longest common subsequence lengths for the two slices.
    158 * Used as part of the diff generation to find the column at which to split
    159 * slice2 while still having the optimal solution.
    160 * If direction is -1, the navigation is reversed. Otherwise it must be 1.
    161 * The length of the resulting integer array is that of the second slice plus
    162 * one.
    163 */
    164 STATIC int *
    165 lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2,
    166            int direction)
    167 {
    168  size_t a_size = sizeof(int) * (slice2->len+1);
    169 
    170  /* Resulting lcs lengths. */
    171  int *result = tor_malloc_zero(a_size);
    172  /* Copy of the lcs lengths from the last iteration. */
    173  int *prev = tor_malloc(a_size);
    174 
    175  tor_assert(direction == 1 || direction == -1);
    176 
    177  int si = slice1->offset;
    178  if (direction == -1) {
    179    si += (slice1->len-1);
    180  }
    181 
    182  for (int i = 0; i < slice1->len; ++i, si+=direction) {
    183 
    184    const cdline_t *line1 = smartlist_get(slice1->list, si);
    185    /* Store the last results. */
    186    memcpy(prev, result, a_size);
    187 
    188    int sj = slice2->offset;
    189    if (direction == -1) {
    190      sj += (slice2->len-1);
    191    }
    192 
    193    for (int j = 0; j < slice2->len; ++j, sj+=direction) {
    194 
    195      const cdline_t *line2 = smartlist_get(slice2->list, sj);
    196      if (lines_eq(line1, line2)) {
    197        /* If the lines are equal, the lcs is one line longer. */
    198        result[j + 1] = prev[j] + 1;
    199      } else {
    200        /* If not, see what lcs parent path is longer. */
    201        result[j + 1] = MAX(result[j], prev[j + 1]);
    202      }
    203    }
    204  }
    205  tor_free(prev);
    206  return result;
    207 }
    208 
    209 /** Helper: Trim any number of lines that are equally at the start or the end
    210 * of both slices.
    211 */
    212 STATIC void
    213 trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
    214 {
    215  while (slice1->len>0 && slice2->len>0) {
    216    const cdline_t *line1 = smartlist_get(slice1->list, slice1->offset);
    217    const cdline_t *line2 = smartlist_get(slice2->list, slice2->offset);
    218    if (!lines_eq(line1, line2)) {
    219      break;
    220    }
    221    slice1->offset++; slice1->len--;
    222    slice2->offset++; slice2->len--;
    223  }
    224 
    225  int i1 = (slice1->offset+slice1->len)-1;
    226  int i2 = (slice2->offset+slice2->len)-1;
    227 
    228  while (slice1->len>0 && slice2->len>0) {
    229    const cdline_t *line1 = smartlist_get(slice1->list, i1);
    230    const cdline_t *line2 = smartlist_get(slice2->list, i2);
    231    if (!lines_eq(line1, line2)) {
    232      break;
    233    }
    234    i1--;
    235    slice1->len--;
    236    i2--;
    237    slice2->len--;
    238  }
    239 }
    240 
    241 /** Like smartlist_string_pos, but uses a cdline_t, and is restricted to the
    242 * bounds of the slice.
    243 */
    244 STATIC int
    245 smartlist_slice_string_pos(const smartlist_slice_t *slice,
    246                           const cdline_t *string)
    247 {
    248  int end = slice->offset + slice->len;
    249  for (int i = slice->offset; i < end; ++i) {
    250    const cdline_t *el = smartlist_get(slice->list, i);
    251    if (lines_eq(el, string)) {
    252      return i;
    253    }
    254  }
    255  return -1;
    256 }
    257 
    258 /** Helper: Set all the appropriate changed booleans to true. The first slice
    259 * must be of length 0 or 1. All the lines of slice1 and slice2 which are not
    260 * present in the other slice will be set to changed in their bool array.
    261 * The two changed bool arrays are passed in the same order as the slices.
    262 */
    263 STATIC void
    264 set_changed(bitarray_t *changed1, bitarray_t *changed2,
    265            const smartlist_slice_t *slice1, const smartlist_slice_t *slice2)
    266 {
    267  int toskip = -1;
    268  tor_assert(slice1->len == 0 || slice1->len == 1);
    269 
    270  if (slice1->len == 1) {
    271    const cdline_t *line_common = smartlist_get(slice1->list, slice1->offset);
    272    toskip = smartlist_slice_string_pos(slice2, line_common);
    273    if (toskip == -1) {
    274      bitarray_set(changed1, slice1->offset);
    275    }
    276  }
    277  int end = slice2->offset + slice2->len;
    278  for (int i = slice2->offset; i < end; ++i) {
    279    if (i != toskip) {
    280      bitarray_set(changed2, i);
    281    }
    282  }
    283 }
    284 
    285 /*
    286 * Helper: Given that slice1 has been split by half into top and bot, we want
    287 * to fetch the column at which to split slice2 so that we are still on track
    288 * to the optimal diff solution, i.e. the shortest one. We use lcs_lengths
    289 * since the shortest diff is just another way to say the longest common
    290 * subsequence.
    291 */
    292 static int
    293 optimal_column_to_split(const smartlist_slice_t *top,
    294                        const smartlist_slice_t *bot,
    295                        const smartlist_slice_t *slice2)
    296 {
    297  int *lens_top = lcs_lengths(top, slice2, 1);
    298  int *lens_bot = lcs_lengths(bot, slice2, -1);
    299  int column=0, max_sum=-1;
    300 
    301  for (int i = 0; i < slice2->len+1; ++i) {
    302    int sum = lens_top[i] + lens_bot[slice2->len-i];
    303    if (sum > max_sum) {
    304      column = i;
    305      max_sum = sum;
    306    }
    307  }
    308  tor_free(lens_top);
    309  tor_free(lens_bot);
    310 
    311  return column;
    312 }
    313 
    314 /**
    315 * Helper: Figure out what elements are new or gone on the second smartlist
    316 * relative to the first smartlist, and store the booleans in the bitarrays.
    317 * True on the first bitarray means the element is gone, true on the second
    318 * bitarray means it's new.
    319 *
    320 * In its base case, either of the smartlists is of length <= 1 and we can
    321 * quickly see what elements are new or are gone. In the other case, we will
    322 * split one smartlist by half and we'll use optimal_column_to_split to find
    323 * the optimal column at which to split the second smartlist so that we are
    324 * finding the smallest diff possible.
    325 */
    326 STATIC void
    327 calc_changes(smartlist_slice_t *slice1,
    328             smartlist_slice_t *slice2,
    329             bitarray_t *changed1, bitarray_t *changed2)
    330 {
    331  trim_slices(slice1, slice2);
    332 
    333  if (slice1->len <= 1) {
    334    set_changed(changed1, changed2, slice1, slice2);
    335 
    336  } else if (slice2->len <= 1) {
    337    set_changed(changed2, changed1, slice2, slice1);
    338 
    339  /* Keep on splitting the slices in two. */
    340  } else {
    341    smartlist_slice_t *top, *bot, *left, *right;
    342 
    343    /* Split the first slice in half. */
    344    int mid = slice1->len/2;
    345    top = smartlist_slice(slice1->list, slice1->offset, slice1->offset+mid);
    346    bot = smartlist_slice(slice1->list, slice1->offset+mid,
    347        slice1->offset+slice1->len);
    348 
    349    /* Split the second slice by the optimal column. */
    350    int mid2 = optimal_column_to_split(top, bot, slice2);
    351    left = smartlist_slice(slice2->list, slice2->offset, slice2->offset+mid2);
    352    right = smartlist_slice(slice2->list, slice2->offset+mid2,
    353        slice2->offset+slice2->len);
    354 
    355    calc_changes(top, left, changed1, changed2);
    356    calc_changes(bot, right, changed1, changed2);
    357    tor_free(top);
    358    tor_free(bot);
    359    tor_free(left);
    360    tor_free(right);
    361  }
    362 }
    363 
    364 /* This table is from crypto.c. The SP and PAD defines are different. */
    365 #define NOT_VALID_BASE64 255
    366 #define X NOT_VALID_BASE64
    367 #define SP NOT_VALID_BASE64
    368 #define PAD NOT_VALID_BASE64
    369 static const uint8_t base64_compare_table[256] = {
    370  X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X,
    371  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    372  SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
    373  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
    374  X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
    375  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
    376  X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
    377  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
    378  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    379  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    380  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    381  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    382  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    383  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    384  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    385  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    386 };
    387 
    388 /** Helper: Get the identity hash from a router line, assuming that the line
    389 * at least appears to be a router line and thus starts with "r ".
    390 *
    391 * If an identity hash is found, store it (without decoding it) in
    392 * <b>hash_out</b>, and return 0.  On failure, return -1.
    393 */
    394 STATIC int
    395 get_id_hash(const cdline_t *line, cdline_t *hash_out)
    396 {
    397  if (line->len < 2)
    398    return -1;
    399 
    400  /* Skip the router name. */
    401  const char *hash = memchr(line->s + 2, ' ', line->len - 2);
    402  if (!hash) {
    403    return -1;
    404  }
    405 
    406  hash++;
    407  const char *hash_end = hash;
    408  /* Stop when the first non-base64 character is found. Use unsigned chars to
    409   * avoid negative indexes causing crashes.
    410   */
    411  while (base64_compare_table[*((unsigned char*)hash_end)]
    412           != NOT_VALID_BASE64 &&
    413         hash_end < line->s + line->len) {
    414    hash_end++;
    415  }
    416 
    417  /* Empty hash. */
    418  if (hash_end == hash) {
    419    return -1;
    420  }
    421 
    422  hash_out->s = hash;
    423  /* Always true because lines are limited to this length */
    424  tor_assert(hash_end >= hash);
    425  tor_assert((size_t)(hash_end - hash) <= UINT32_MAX);
    426  hash_out->len = (uint32_t)(hash_end - hash);
    427 
    428  return 0;
    429 }
    430 
    431 /** Helper: Check that a line is a valid router entry. We must at least be
    432 * able to fetch a proper identity hash from it for it to be valid.
    433 */
    434 STATIC int
    435 is_valid_router_entry(const cdline_t *line)
    436 {
    437  if (line->len < 2 || fast_memneq(line->s, "r ", 2))
    438    return 0;
    439  cdline_t tmp;
    440  return (get_id_hash(line, &tmp) == 0);
    441 }
    442 
    443 /** Helper: Find the next router line starting at the current position.
    444 * Assumes that cur is lower than the length of the smartlist, i.e. it is a
    445 * line within the bounds of the consensus. The only exception is when we
    446 * don't want to skip the first line, in which case cur will be -1.
    447 */
    448 STATIC int
    449 next_router(const smartlist_t *cons, int cur)
    450 {
    451  int len = smartlist_len(cons);
    452  tor_assert(cur >= -1 && cur < len);
    453 
    454  if (++cur >= len) {
    455    return len;
    456  }
    457 
    458  const cdline_t *line = smartlist_get(cons, cur);
    459  while (!is_valid_router_entry(line)) {
    460    if (++cur >= len) {
    461      return len;
    462    }
    463    line = smartlist_get(cons, cur);
    464  }
    465  return cur;
    466 }
    467 
    468 /** Helper: compare two base64-encoded identity hashes, which may be of
    469 * different lengths. Comparison ends when the first non-base64 char is found.
    470 */
    471 STATIC int
    472 base64cmp(const cdline_t *hash1, const cdline_t *hash2)
    473 {
    474  /* NULL is always lower, useful for last_hash which starts at NULL. */
    475  if (!hash1->s && !hash2->s) {
    476    return 0;
    477  }
    478  if (!hash1->s) {
    479    return -1;
    480  }
    481  if (!hash2->s) {
    482    return 1;
    483  }
    484 
    485  /* Don't index with a char; char may be signed. */
    486  const unsigned char *a = (unsigned char*)hash1->s;
    487  const unsigned char *b = (unsigned char*)hash2->s;
    488  const unsigned char *a_end = a + hash1->len;
    489  const unsigned char *b_end = b + hash2->len;
    490  while (1) {
    491    uint8_t av = base64_compare_table[*a];
    492    uint8_t bv = base64_compare_table[*b];
    493    if (av == NOT_VALID_BASE64) {
    494      if (bv == NOT_VALID_BASE64) {
    495        /* Both ended with exactly the same characters. */
    496        return 0;
    497      } else {
    498        /* hash2 goes on longer than hash1 and thus hash1 is lower. */
    499        return -1;
    500      }
    501    } else if (bv == NOT_VALID_BASE64) {
    502      /* hash1 goes on longer than hash2 and thus hash1 is greater. */
    503      return 1;
    504    } else if (av < bv) {
    505      /* The first difference shows that hash1 is lower. */
    506      return -1;
    507    } else if (av > bv) {
    508      /* The first difference shows that hash1 is greater. */
    509      return 1;
    510    } else {
    511      a++;
    512      b++;
    513      if (a == a_end) {
    514        if (b == b_end) {
    515          return 0;
    516        } else {
    517          return -1;
    518        }
    519      } else if (b == b_end) {
    520        return 1;
    521      }
    522    }
    523  }
    524 }
    525 
    526 /** Structure used to remember the previous and current identity hash of
    527 * the "r " lines in a consensus, to enforce well-ordering. */
    528 typedef struct router_id_iterator_t {
    529  cdline_t last_hash;
    530  cdline_t hash;
    531 } router_id_iterator_t;
    532 
    533 #ifndef COCCI
    534 /**
    535 * Initializer for a router_id_iterator_t.
    536 */
    537 #define ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } }
    538 #endif /* !defined(COCCI) */
    539 
    540 /** Given an index *<b>idxp</b> into the consensus at <b>cons</b>, advance
    541 * the index to the next router line ("r ...") in the consensus, or to
    542 * an index one after the end of the list if there is no such line.
    543 *
    544 * Use <b>iter</b> to record the hash of the found router line, if any,
    545 * and to enforce ordering on the hashes.  If the hashes are mis-ordered,
    546 * return -1.  Else, return 0.
    547 **/
    548 static int
    549 find_next_router_line(const smartlist_t *cons,
    550                      const char *consname,
    551                      int *idxp,
    552                      router_id_iterator_t *iter)
    553 {
    554  *idxp = next_router(cons, *idxp);
    555  if (*idxp < smartlist_len(cons)) {
    556    memcpy(&iter->last_hash, &iter->hash, sizeof(cdline_t));
    557    if (get_id_hash(smartlist_get(cons, *idxp), &iter->hash) < 0 ||
    558        base64cmp(&iter->hash, &iter->last_hash) <= 0) {
    559      log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
    560               "the %s consensus doesn't have its router entries sorted "
    561               "properly.", consname);
    562      return -1;
    563    }
    564  }
    565  return 0;
    566 }
    567 
    568 /** Line-prefix indicating the beginning of the signatures section that we
    569 * intend to delete. */
    570 #define START_OF_SIGNATURES_SECTION "directory-signature "
    571 
    572 /** Pre-process a consensus in <b>cons</b> (represented as a list of cdline_t)
    573 * to remove the signatures from it.  If the footer is removed, return a
    574 * cdline_t containing a delete command to delete the footer, allocated in
    575 * <b>area</b>.  If no footer is removed, return NULL.
    576 *
    577 * We remove the signatures here because they are not themselves signed, and
    578 * as such there might be different encodings for them.
    579 */
    580 static cdline_t *
    581 preprocess_consensus(memarea_t *area,
    582                     smartlist_t *cons)
    583 {
    584  int idx;
    585  int dirsig_idx = -1;
    586  for (idx = 0; idx < smartlist_len(cons); ++idx) {
    587    cdline_t *line = smartlist_get(cons, idx);
    588    if (line_starts_with_str(line, START_OF_SIGNATURES_SECTION)) {
    589      dirsig_idx = idx;
    590      break;
    591    }
    592  }
    593  if (dirsig_idx >= 0) {
    594    char buf[64];
    595    while (smartlist_len(cons) > dirsig_idx)
    596      smartlist_del(cons, dirsig_idx);
    597    tor_snprintf(buf, sizeof(buf), "%d,$d", dirsig_idx+1);
    598    return cdline_linecpy(area, buf);
    599  } else {
    600    return NULL;
    601  }
    602 }
    603 
    604 /** Generate an ed diff as a smartlist from two consensuses, also given as
    605 * smartlists. Will return NULL if the diff could not be generated, which can
    606 * happen if any lines the script had to add matched "." or if the routers
    607 * were not properly ordered.
    608 *
    609 * All cdline_t objects in the resulting object are either references to lines
    610 * in one of the inputs, or are newly allocated lines in the provided memarea.
    611 *
    612 * This implementation is consensus-specific. To generate an ed diff for any
    613 * given input in quadratic time, you can replace all the code until the
    614 * navigation in reverse order with the following:
    615 *
    616 *   int len1 = smartlist_len(cons1);
    617 *   int len2 = smartlist_len(cons2);
    618 *   bitarray_t *changed1 = bitarray_init_zero(len1);
    619 *   bitarray_t *changed2 = bitarray_init_zero(len2);
    620 *   cons1_sl = smartlist_slice(cons1, 0, -1);
    621 *   cons2_sl = smartlist_slice(cons2, 0, -1);
    622 *   calc_changes(cons1_sl, cons2_sl, changed1, changed2);
    623 */
    624 STATIC smartlist_t *
    625 gen_ed_diff(const smartlist_t *cons1_orig, const smartlist_t *cons2,
    626            memarea_t *area)
    627 {
    628  smartlist_t *cons1 = smartlist_new();
    629  smartlist_add_all(cons1, cons1_orig);
    630  cdline_t *remove_trailer = preprocess_consensus(area, cons1);
    631 
    632  int len1 = smartlist_len(cons1);
    633  int len2 = smartlist_len(cons2);
    634  smartlist_t *result = smartlist_new();
    635 
    636  if (remove_trailer) {
    637    /* There's a delete-the-trailer line at the end, so add it here. */
    638    smartlist_add(result, remove_trailer);
    639  }
    640 
    641  /* Initialize the changed bitarrays to zero, so that calc_changes only needs
    642   * to set the ones that matter and leave the rest untouched.
    643   */
    644  bitarray_t *changed1 = bitarray_init_zero(len1);
    645  bitarray_t *changed2 = bitarray_init_zero(len2);
    646  int i1=-1, i2=-1;
    647  int start1=0, start2=0;
    648 
    649  /* To check that hashes are ordered properly */
    650  router_id_iterator_t iter1 = ROUTER_ID_ITERATOR_INIT;
    651  router_id_iterator_t iter2 = ROUTER_ID_ITERATOR_INIT;
    652 
    653  /* i1 and i2 are initialized at the first line of each consensus. They never
    654   * reach past len1 and len2 respectively, since next_router doesn't let that
    655   * happen. i1 and i2 are advanced by at least one line at each iteration as
    656   * long as they have not yet reached len1 and len2, so the loop is
    657   * guaranteed to end, and each pair of (i1,i2) will be inspected at most
    658   * once.
    659   */
    660  while (i1 < len1 || i2 < len2) {
    661 
    662    /* Advance each of the two navigation positions by one router entry if not
    663     * yet at the end.
    664     */
    665    if (i1 < len1) {
    666      if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
    667        goto error_cleanup;
    668      }
    669    }
    670 
    671    if (i2 < len2) {
    672      if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
    673        goto error_cleanup;
    674      }
    675    }
    676 
    677    /* If we have reached the end of both consensuses, there is no need to
    678     * compare hashes anymore, since this is the last iteration.
    679     */
    680    if (i1 < len1 || i2 < len2) {
    681 
    682      /* Keep on advancing the lower (by identity hash sorting) position until
    683       * we have two matching positions. The only other possible outcome is
    684       * that a lower position reaches the end of the consensus before it can
    685       * reach a hash that is no longer the lower one. Since there will always
    686       * be a lower hash for as long as the loop runs, one of the two indexes
    687       * will always be incremented, thus assuring that the loop must end
    688       * after a finite number of iterations. If that cannot be because said
    689       * consensus has already reached the end, both are extended to their
    690       * respecting ends since we are done.
    691       */
    692      int cmp = base64cmp(&iter1.hash, &iter2.hash);
    693      while (cmp != 0) {
    694        if (i1 < len1 && cmp < 0) {
    695          if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
    696            goto error_cleanup;
    697          }
    698          if (i1 == len1) {
    699            /* We finished the first consensus, so grab all the remaining
    700             * lines of the second consensus and finish up.
    701             */
    702            i2 = len2;
    703            break;
    704          }
    705        } else if (i2 < len2 && cmp > 0) {
    706          if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
    707            goto error_cleanup;
    708          }
    709          if (i2 == len2) {
    710            /* We finished the second consensus, so grab all the remaining
    711             * lines of the first consensus and finish up.
    712             */
    713            i1 = len1;
    714            break;
    715          }
    716        } else {
    717          i1 = len1;
    718          i2 = len2;
    719          break;
    720        }
    721        cmp = base64cmp(&iter1.hash, &iter2.hash);
    722      }
    723    }
    724 
    725    /* Make slices out of these chunks (up to the common router entry) and
    726     * calculate the changes for them.
    727     * Error if any of the two slices are longer than 10K lines. That should
    728     * never happen with any pair of real consensuses. Feeding more than 10K
    729     * lines to calc_changes would be very slow anyway.
    730     */
    731 #define MAX_LINE_COUNT (10000)
    732    if (i1-start1 > MAX_LINE_COUNT || i2-start2 > MAX_LINE_COUNT) {
    733      log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
    734          "we found too few common router ids.");
    735      goto error_cleanup;
    736    }
    737 
    738    smartlist_slice_t *cons1_sl = smartlist_slice(cons1, start1, i1);
    739    smartlist_slice_t *cons2_sl = smartlist_slice(cons2, start2, i2);
    740    calc_changes(cons1_sl, cons2_sl, changed1, changed2);
    741    tor_free(cons1_sl);
    742    tor_free(cons2_sl);
    743    start1 = i1, start2 = i2;
    744  }
    745 
    746  /* Navigate the changes in reverse order and generate one ed command for
    747   * each chunk of changes.
    748   */
    749  i1=len1-1, i2=len2-1;
    750  char buf[128];
    751  while (i1 >= 0 || i2 >= 0) {
    752 
    753    int start1x, start2x, end1, end2, added, deleted;
    754 
    755    /* We are at a point were no changed bools are true, so just keep going. */
    756    if (!(i1 >= 0 && bitarray_is_set(changed1, i1)) &&
    757        !(i2 >= 0 && bitarray_is_set(changed2, i2))) {
    758      if (i1 >= 0) {
    759        i1--;
    760      }
    761      if (i2 >= 0) {
    762        i2--;
    763      }
    764      continue;
    765    }
    766 
    767    end1 = i1, end2 = i2;
    768 
    769    /* Grab all contiguous changed lines */
    770    while (i1 >= 0 && bitarray_is_set(changed1, i1)) {
    771      i1--;
    772    }
    773    while (i2 >= 0 && bitarray_is_set(changed2, i2)) {
    774      i2--;
    775    }
    776 
    777    start1x = i1+1, start2x = i2+1;
    778    added = end2-i2, deleted = end1-i1;
    779 
    780    if (added == 0) {
    781      if (deleted == 1) {
    782        tor_snprintf(buf, sizeof(buf), "%id", start1x+1);
    783        smartlist_add_linecpy(result, area, buf);
    784      } else {
    785        tor_snprintf(buf, sizeof(buf), "%i,%id", start1x+1, start1x+deleted);
    786        smartlist_add_linecpy(result, area, buf);
    787      }
    788    } else {
    789      int i;
    790      if (deleted == 0) {
    791        tor_snprintf(buf, sizeof(buf), "%ia", start1x);
    792        smartlist_add_linecpy(result, area, buf);
    793      } else if (deleted == 1) {
    794        tor_snprintf(buf, sizeof(buf), "%ic", start1x+1);
    795        smartlist_add_linecpy(result, area, buf);
    796      } else {
    797        tor_snprintf(buf, sizeof(buf), "%i,%ic", start1x+1, start1x+deleted);
    798        smartlist_add_linecpy(result, area, buf);
    799      }
    800 
    801      for (i = start2x; i <= end2; ++i) {
    802        cdline_t *line = smartlist_get(cons2, i);
    803        if (line_str_eq(line, ".")) {
    804          log_warn(LD_CONSDIFF, "Cannot generate consensus diff because "
    805              "one of the lines to be added is \".\".");
    806          goto error_cleanup;
    807        }
    808        smartlist_add(result, line);
    809      }
    810      smartlist_add_linecpy(result, area, ".");
    811    }
    812  }
    813 
    814  smartlist_free(cons1);
    815  bitarray_free(changed1);
    816  bitarray_free(changed2);
    817 
    818  return result;
    819 
    820 error_cleanup:
    821 
    822  smartlist_free(cons1);
    823  bitarray_free(changed1);
    824  bitarray_free(changed2);
    825 
    826  smartlist_free(result);
    827 
    828  return NULL;
    829 }
    830 
    831 /* Helper: Read a base-10 number between 0 and INT32_MAX from <b>s</b> and
    832 * store it in <b>num_out</b>.  Advance <b>s</b> to the character immediately
    833 * after the number.  Return 0 on success, -1 on failure. */
    834 static int
    835 get_linenum(const char **s, int *num_out)
    836 {
    837  int ok;
    838  char *next;
    839  if (!TOR_ISDIGIT(**s)) {
    840    return -1;
    841  }
    842  *num_out = (int) tor_parse_long(*s, 10, 0, INT32_MAX, &ok, &next);
    843  if (ok && next) {
    844    *s = next;
    845    return 0;
    846  } else {
    847    return -1;
    848  }
    849 }
    850 
    851 /** Apply the ed diff, starting at <b>diff_starting_line</b>, to the consensus
    852 * and return a new consensus, also as a line-based smartlist. Will return
    853 * NULL if the ed diff is not properly formatted.
    854 *
    855 * All cdline_t objects in the resulting object are references to lines
    856 * in one of the inputs; nothing is copied.
    857 */
    858 STATIC smartlist_t *
    859 apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
    860              int diff_starting_line)
    861 {
    862  int diff_len = smartlist_len(diff);
    863  int j = smartlist_len(cons1);
    864  smartlist_t *cons2 = smartlist_new();
    865 
    866  for (int i=diff_starting_line; i<diff_len; ++i) {
    867    const cdline_t *diff_cdline = smartlist_get(diff, i);
    868    char diff_line[128];
    869 
    870    if (diff_cdline->len > sizeof(diff_line) - 1) {
    871      log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
    872               "an ed command was far too long");
    873      goto error_cleanup;
    874    }
    875    /* Copy the line to make it nul-terminated. */
    876    memcpy(diff_line, diff_cdline->s, diff_cdline->len);
    877    diff_line[diff_cdline->len] = 0;
    878    const char *ptr = diff_line;
    879    int start = 0, end = 0;
    880    int had_range = 0;
    881    int end_was_eof = 0;
    882    if (get_linenum(&ptr, &start) < 0) {
    883      log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
    884               "an ed command was missing a line number.");
    885      goto error_cleanup;
    886    }
    887    if (*ptr == ',') {
    888      /* Two-item range */
    889      had_range = 1;
    890      ++ptr;
    891      if (*ptr == '$') {
    892        end_was_eof = 1;
    893        end = smartlist_len(cons1);
    894        ++ptr;
    895      } else if (get_linenum(&ptr, &end) < 0) {
    896        log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
    897                 "an ed command was missing a range end line number.");
    898        goto error_cleanup;
    899      }
    900      /* Incoherent range. */
    901      if (end <= start) {
    902        log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
    903                 "an invalid range was found in an ed command.");
    904        goto error_cleanup;
    905      }
    906    } else {
    907      /* We'll take <n1> as <n1>,<n1> for simplicity. */
    908      end = start;
    909    }
    910 
    911    if (end > j) {
    912      log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
    913          "its commands are not properly sorted in reverse order.");
    914      goto error_cleanup;
    915    }
    916 
    917    if (*ptr == '\0') {
    918      log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
    919               "a line with no ed command was found");
    920      goto error_cleanup;
    921    }
    922 
    923    if (*(ptr+1) != '\0') {
    924      log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
    925          "an ed command longer than one char was found.");
    926      goto error_cleanup;
    927    }
    928 
    929    char action = *ptr;
    930 
    931    switch (action) {
    932      case 'a':
    933      case 'c':
    934      case 'd':
    935        break;
    936      default:
    937        log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
    938            "an unrecognised ed command was found.");
    939        goto error_cleanup;
    940    }
    941 
    942    /** $ is not allowed with non-d actions. */
    943    if (end_was_eof && action != 'd') {
    944      log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
    945               "it wanted to use $ with a command other than delete");
    946      goto error_cleanup;
    947    }
    948 
    949    /* 'a' commands are not allowed to have ranges. */
    950    if (had_range && action == 'a') {
    951      log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
    952          "it wanted to add lines after a range.");
    953      goto error_cleanup;
    954    }
    955 
    956    /* Add unchanged lines. */
    957    for (; j && j > end; --j) {
    958      cdline_t *cons_line = smartlist_get(cons1, j-1);
    959      smartlist_add(cons2, cons_line);
    960    }
    961 
    962    /* Ignore removed lines. */
    963    if (action == 'c' || action == 'd') {
    964      while (--j >= start) {
    965        /* Skip line */
    966      }
    967    }
    968 
    969    /* Add new lines in reverse order, since it will all be reversed at the
    970     * end.
    971     */
    972    if (action == 'a' || action == 'c') {
    973      int added_end = i;
    974 
    975      i++; /* Skip the line with the range and command. */
    976      while (i < diff_len) {
    977        if (line_str_eq(smartlist_get(diff, i), ".")) {
    978          break;
    979        }
    980        if (++i == diff_len) {
    981          log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
    982              "it has lines to be inserted that don't end with a \".\".");
    983          goto error_cleanup;
    984        }
    985      }
    986 
    987      int added_i = i-1;
    988 
    989      /* It would make no sense to add zero new lines. */
    990      if (added_i == added_end) {
    991        log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
    992            "it has an ed command that tries to insert zero lines.");
    993        goto error_cleanup;
    994      }
    995 
    996      while (added_i > added_end) {
    997        cdline_t *added_line = smartlist_get(diff, added_i--);
    998        smartlist_add(cons2, added_line);
    999      }
   1000    }
   1001  }
   1002 
   1003  /* Add remaining unchanged lines. */
   1004  for (; j > 0; --j) {
   1005    cdline_t *cons_line = smartlist_get(cons1, j-1);
   1006    smartlist_add(cons2, cons_line);
   1007  }
   1008 
   1009  /* Reverse the whole thing since we did it from the end. */
   1010  smartlist_reverse(cons2);
   1011  return cons2;
   1012 
   1013 error_cleanup:
   1014 
   1015  smartlist_free(cons2);
   1016 
   1017  return NULL;
   1018 }
   1019 
   1020 /** Generate a consensus diff as a smartlist from two given consensuses, also
   1021 * as smartlists. Will return NULL if the consensus diff could not be
   1022 * generated. Neither of the two consensuses are modified in any way, so it's
   1023 * up to the caller to free their resources.
   1024 */
   1025 smartlist_t *
   1026 consdiff_gen_diff(const smartlist_t *cons1,
   1027                  const smartlist_t *cons2,
   1028                  const consensus_digest_t *digests1,
   1029                  const consensus_digest_t *digests2,
   1030                  memarea_t *area)
   1031 {
   1032  smartlist_t *ed_diff = gen_ed_diff(cons1, cons2, area);
   1033  /* ed diff could not be generated - reason already logged by gen_ed_diff. */
   1034  if (!ed_diff) {
   1035    goto error_cleanup;
   1036  }
   1037 
   1038  /* See that the script actually produces what we want. */
   1039  smartlist_t *ed_cons2 = apply_ed_diff(cons1, ed_diff, 0);
   1040  if (!ed_cons2) {
   1041    /* LCOV_EXCL_START -- impossible if diff generation is correct */
   1042    log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
   1043        "the generated ed diff could not be tested to successfully generate "
   1044        "the target consensus.");
   1045    goto error_cleanup;
   1046    /* LCOV_EXCL_STOP */
   1047  }
   1048 
   1049  int cons2_eq = 1;
   1050  if (smartlist_len(cons2) == smartlist_len(ed_cons2)) {
   1051    SMARTLIST_FOREACH_BEGIN(cons2, const cdline_t *, line1) {
   1052      const cdline_t *line2 = smartlist_get(ed_cons2, line1_sl_idx);
   1053      if (!lines_eq(line1, line2)) {
   1054        cons2_eq = 0;
   1055        break;
   1056      }
   1057    } SMARTLIST_FOREACH_END(line1);
   1058  } else {
   1059    cons2_eq = 0;
   1060  }
   1061  smartlist_free(ed_cons2);
   1062  if (!cons2_eq) {
   1063    /* LCOV_EXCL_START -- impossible if diff generation is correct. */
   1064    log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
   1065        "the generated ed diff did not generate the target consensus "
   1066        "successfully when tested.");
   1067    goto error_cleanup;
   1068    /* LCOV_EXCL_STOP */
   1069  }
   1070 
   1071  char cons1_hash_hex[HEX_DIGEST256_LEN+1];
   1072  char cons2_hash_hex[HEX_DIGEST256_LEN+1];
   1073  base16_encode(cons1_hash_hex, HEX_DIGEST256_LEN+1,
   1074                (const char*)digests1->sha3_256, DIGEST256_LEN);
   1075  base16_encode(cons2_hash_hex, HEX_DIGEST256_LEN+1,
   1076                (const char*)digests2->sha3_256, DIGEST256_LEN);
   1077 
   1078  /* Create the resulting consensus diff. */
   1079  char buf[160];
   1080  smartlist_t *result = smartlist_new();
   1081  tor_snprintf(buf, sizeof(buf), "%s", ns_diff_version);
   1082  smartlist_add_linecpy(result, area, buf);
   1083  tor_snprintf(buf, sizeof(buf), "%s %s %s", hash_token,
   1084      cons1_hash_hex, cons2_hash_hex);
   1085  smartlist_add_linecpy(result, area, buf);
   1086  smartlist_add_all(result, ed_diff);
   1087  smartlist_free(ed_diff);
   1088  return result;
   1089 
   1090 error_cleanup:
   1091 
   1092  if (ed_diff) {
   1093    /* LCOV_EXCL_START -- ed_diff is NULL except in unreachable cases above */
   1094    smartlist_free(ed_diff);
   1095    /* LCOV_EXCL_STOP */
   1096  }
   1097 
   1098  return NULL;
   1099 }
   1100 
   1101 /** Fetch the digest of the base consensus in the consensus diff, encoded in
   1102 * base16 as found in the diff itself. digest1_out and digest2_out must be of
   1103 * length DIGEST256_LEN or larger if not NULL.
   1104 */
   1105 int
   1106 consdiff_get_digests(const smartlist_t *diff,
   1107                     char *digest1_out,
   1108                     char *digest2_out)
   1109 {
   1110  smartlist_t *hash_words = NULL;
   1111  const cdline_t *format;
   1112  char cons1_hash[DIGEST256_LEN], cons2_hash[DIGEST256_LEN];
   1113  char *cons1_hash_hex, *cons2_hash_hex;
   1114  if (smartlist_len(diff) < 2) {
   1115    log_info(LD_CONSDIFF, "The provided consensus diff is too short.");
   1116    goto error_cleanup;
   1117  }
   1118 
   1119  /* Check that it's the format and version we know. */
   1120  format = smartlist_get(diff, 0);
   1121  if (!line_str_eq(format, ns_diff_version)) {
   1122    log_warn(LD_CONSDIFF, "The provided consensus diff format is not known.");
   1123    goto error_cleanup;
   1124  }
   1125 
   1126  /* Grab the base16 digests. */
   1127  hash_words = smartlist_new();
   1128  {
   1129    const cdline_t *line2 = smartlist_get(diff, 1);
   1130    char *h = tor_memdup_nulterm(line2->s, line2->len);
   1131    smartlist_split_string(hash_words, h, " ", 0, 4);
   1132    tor_free(h);
   1133  }
   1134 
   1135  /* There have to be three words, the first of which must be hash_token. */
   1136  if (smartlist_len(hash_words) != 3 ||
   1137      strcmp(smartlist_get(hash_words, 0), hash_token)) {
   1138    log_info(LD_CONSDIFF, "The provided consensus diff does not include "
   1139        "the necessary digests.");
   1140    goto error_cleanup;
   1141  }
   1142 
   1143  /* Expected hashes as found in the consensus diff header. They must be of
   1144   * length HEX_DIGEST256_LEN, normally 64 hexadecimal characters.
   1145   * If any of the decodings fail, error to make sure that the hashes are
   1146   * proper base16-encoded digests.
   1147   */
   1148  cons1_hash_hex = smartlist_get(hash_words, 1);
   1149  cons2_hash_hex = smartlist_get(hash_words, 2);
   1150  if (strlen(cons1_hash_hex) != HEX_DIGEST256_LEN ||
   1151      strlen(cons2_hash_hex) != HEX_DIGEST256_LEN) {
   1152    log_info(LD_CONSDIFF, "The provided consensus diff includes "
   1153        "base16-encoded digests of incorrect size.");
   1154    goto error_cleanup;
   1155  }
   1156 
   1157  if (base16_decode(cons1_hash, DIGEST256_LEN,
   1158          cons1_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN ||
   1159      base16_decode(cons2_hash, DIGEST256_LEN,
   1160          cons2_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN) {
   1161    log_info(LD_CONSDIFF, "The provided consensus diff includes "
   1162        "malformed digests.");
   1163    goto error_cleanup;
   1164  }
   1165 
   1166  if (digest1_out) {
   1167    memcpy(digest1_out, cons1_hash, DIGEST256_LEN);
   1168  }
   1169  if (digest2_out) {
   1170    memcpy(digest2_out, cons2_hash, DIGEST256_LEN);
   1171  }
   1172 
   1173  SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
   1174  smartlist_free(hash_words);
   1175  return 0;
   1176 
   1177 error_cleanup:
   1178 
   1179  if (hash_words) {
   1180    SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
   1181    smartlist_free(hash_words);
   1182  }
   1183  return 1;
   1184 }
   1185 
   1186 /** Apply the consensus diff to the given consensus and return a new
   1187 * consensus, also as a line-based smartlist. Will return NULL if the diff
   1188 * could not be applied. Neither the consensus nor the diff are modified in
   1189 * any way, so it's up to the caller to free their resources.
   1190 */
   1191 char *
   1192 consdiff_apply_diff(const smartlist_t *cons1,
   1193                    const smartlist_t *diff,
   1194                    const consensus_digest_t *digests1)
   1195 {
   1196  smartlist_t *cons2 = NULL;
   1197  char *cons2_str = NULL;
   1198  char e_cons1_hash[DIGEST256_LEN];
   1199  char e_cons2_hash[DIGEST256_LEN];
   1200 
   1201  if (consdiff_get_digests(diff, e_cons1_hash, e_cons2_hash) != 0) {
   1202    goto error_cleanup;
   1203  }
   1204 
   1205  /* See that the consensus that was given to us matches its hash. */
   1206  if (!consensus_digest_eq(digests1->sha3_256,
   1207                           (const uint8_t*)e_cons1_hash)) {
   1208    char hex_digest1[HEX_DIGEST256_LEN+1];
   1209    char e_hex_digest1[HEX_DIGEST256_LEN+1];
   1210    log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
   1211        "the base consensus doesn't match the digest as found in "
   1212        "the consensus diff header.");
   1213    base16_encode(hex_digest1, HEX_DIGEST256_LEN+1,
   1214                  (const char *)digests1->sha3_256, DIGEST256_LEN);
   1215    base16_encode(e_hex_digest1, HEX_DIGEST256_LEN+1,
   1216                  e_cons1_hash, DIGEST256_LEN);
   1217    log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
   1218             hex_digest1, e_hex_digest1);
   1219    goto error_cleanup;
   1220  }
   1221 
   1222  /* Grab the ed diff and calculate the resulting consensus. */
   1223  /* Skip the first two lines. */
   1224  cons2 = apply_ed_diff(cons1, diff, 2);
   1225 
   1226  /* ed diff could not be applied - reason already logged by apply_ed_diff. */
   1227  if (!cons2) {
   1228    goto error_cleanup;
   1229  }
   1230 
   1231  cons2_str = consensus_join_lines(cons2);
   1232 
   1233  consensus_digest_t cons2_digests;
   1234  if (consensus_compute_digest(cons2_str, strlen(cons2_str),
   1235                               &cons2_digests) < 0) {
   1236    /* LCOV_EXCL_START -- digest can't fail */
   1237    log_warn(LD_CONSDIFF, "Could not compute digests of the consensus "
   1238        "resulting from applying a consensus diff.");
   1239    goto error_cleanup;
   1240    /* LCOV_EXCL_STOP */
   1241  }
   1242 
   1243  /* See that the resulting consensus matches its hash. */
   1244  if (!consensus_digest_eq(cons2_digests.sha3_256,
   1245                           (const uint8_t*)e_cons2_hash)) {
   1246    log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
   1247        "the resulting consensus doesn't match the digest as found in "
   1248        "the consensus diff header.");
   1249    char hex_digest2[HEX_DIGEST256_LEN+1];
   1250    char e_hex_digest2[HEX_DIGEST256_LEN+1];
   1251    base16_encode(hex_digest2, HEX_DIGEST256_LEN+1,
   1252        (const char *)cons2_digests.sha3_256, DIGEST256_LEN);
   1253    base16_encode(e_hex_digest2, HEX_DIGEST256_LEN+1,
   1254        e_cons2_hash, DIGEST256_LEN);
   1255    log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
   1256             hex_digest2, e_hex_digest2);
   1257    goto error_cleanup;
   1258  }
   1259 
   1260  goto done;
   1261 
   1262 error_cleanup:
   1263  tor_free(cons2_str); /* Sets it to NULL */
   1264 
   1265 done:
   1266  if (cons2) {
   1267    smartlist_free(cons2);
   1268  }
   1269 
   1270  return cons2_str;
   1271 }
   1272 
   1273 /** Any consensus line longer than this means that the input is invalid. */
   1274 #define CONSENSUS_LINE_MAX_LEN (1<<20)
   1275 
   1276 /**
   1277 * Helper: For every NL-terminated line in <b>s</b>, add a cdline referring to
   1278 * that line (without trailing newline) to <b>out</b>.  Return -1 if there are
   1279 * any non-NL terminated lines; 0 otherwise.
   1280 *
   1281 * Unlike tor_split_lines, this function avoids ambiguity on its
   1282 * handling of a final line that isn't NL-terminated.
   1283 *
   1284 * All cdline_t objects are allocated in the provided memarea.  Strings
   1285 * are not copied: if <b>s</b> changes or becomes invalid, then all
   1286 * generated cdlines will become invalid.
   1287 */
   1288 STATIC int
   1289 consensus_split_lines(smartlist_t *out,
   1290                      const char *s, size_t len,
   1291                      memarea_t *area)
   1292 {
   1293  const char *end_of_str = s + len;
   1294 
   1295  while (s < end_of_str) {
   1296    const char *eol = memchr(s, '\n', end_of_str - s);
   1297    if (!eol) {
   1298      /* File doesn't end with newline. */
   1299      return -1;
   1300    }
   1301    if (eol - s > CONSENSUS_LINE_MAX_LEN) {
   1302      /* Line is far too long. */
   1303      return -1;
   1304    }
   1305    cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
   1306    line->s = s;
   1307    line->len = (uint32_t)(eol - s);
   1308    smartlist_add(out, line);
   1309    s = eol+1;
   1310  }
   1311  return 0;
   1312 }
   1313 
   1314 /** Given a list of cdline_t, return a newly allocated string containing
   1315 * all of the lines, terminated with NL, concatenated.
   1316 *
   1317 * Unlike smartlist_join_strings(), avoids lossy operations on empty
   1318 * lists.  */
   1319 static char *
   1320 consensus_join_lines(const smartlist_t *inp)
   1321 {
   1322  size_t n = 0;
   1323  SMARTLIST_FOREACH(inp, const cdline_t *, cdline, n += cdline->len + 1);
   1324  n += 1;
   1325  char *result = tor_malloc(n);
   1326  char *out = result;
   1327  SMARTLIST_FOREACH_BEGIN(inp, const cdline_t *, cdline) {
   1328    memcpy(out, cdline->s, cdline->len);
   1329    out += cdline->len;
   1330    *out++ = '\n';
   1331  } SMARTLIST_FOREACH_END(cdline);
   1332  *out++ = '\0';
   1333  tor_assert(out == result+n);
   1334  return result;
   1335 }
   1336 
   1337 /** Given two consensus documents, try to compute a diff between them.  On
   1338 * success, return a newly allocated string containing that diff.  On failure,
   1339 * return NULL. */
   1340 char *
   1341 consensus_diff_generate(const char *cons1, size_t cons1len,
   1342                        const char *cons2, size_t cons2len)
   1343 {
   1344  consensus_digest_t d1, d2;
   1345  smartlist_t *lines1 = NULL, *lines2 = NULL, *result_lines = NULL;
   1346  int r1, r2;
   1347  char *result = NULL;
   1348 
   1349  r1 = consensus_compute_digest_as_signed(cons1, cons1len, &d1);
   1350  r2 = consensus_compute_digest(cons2, cons2len, &d2);
   1351  if (BUG(r1 < 0 || r2 < 0))
   1352    return NULL; // LCOV_EXCL_LINE
   1353 
   1354  memarea_t *area = memarea_new();
   1355  lines1 = smartlist_new();
   1356  lines2 = smartlist_new();
   1357  if (consensus_split_lines(lines1, cons1, cons1len, area) < 0)
   1358    goto done;
   1359  if (consensus_split_lines(lines2, cons2, cons2len, area) < 0)
   1360    goto done;
   1361 
   1362  result_lines = consdiff_gen_diff(lines1, lines2, &d1, &d2, area);
   1363 
   1364 done:
   1365  if (result_lines) {
   1366    result = consensus_join_lines(result_lines);
   1367    smartlist_free(result_lines);
   1368  }
   1369 
   1370  memarea_drop_all(area);
   1371  smartlist_free(lines1);
   1372  smartlist_free(lines2);
   1373 
   1374  return result;
   1375 }
   1376 
   1377 /** Given a consensus document and a diff, try to apply the diff to the
   1378 * consensus.  On success return a newly allocated string containing the new
   1379 * consensus.  On failure, return NULL. */
   1380 char *
   1381 consensus_diff_apply(const char *consensus,
   1382                     size_t consensus_len,
   1383                     const char *diff,
   1384                     size_t diff_len)
   1385 {
   1386  consensus_digest_t d1;
   1387  smartlist_t *lines1 = NULL, *lines2 = NULL;
   1388  int r1;
   1389  char *result = NULL;
   1390  memarea_t *area = memarea_new();
   1391 
   1392  r1 = consensus_compute_digest_as_signed(consensus, consensus_len, &d1);
   1393  if (BUG(r1 < 0))
   1394    goto done;
   1395 
   1396  lines1 = smartlist_new();
   1397  lines2 = smartlist_new();
   1398  if (consensus_split_lines(lines1, consensus, consensus_len, area) < 0)
   1399    goto done;
   1400  if (consensus_split_lines(lines2, diff, diff_len, area) < 0)
   1401    goto done;
   1402 
   1403  result = consdiff_apply_diff(lines1, lines2, &d1);
   1404 
   1405 done:
   1406  smartlist_free(lines1);
   1407  smartlist_free(lines2);
   1408  memarea_drop_all(area);
   1409 
   1410  return result;
   1411 }
   1412 
   1413 /** Return true iff, based on its header, <b>document</b> is likely
   1414 * to be a consensus diff. */
   1415 int
   1416 looks_like_a_consensus_diff(const char *document, size_t len)
   1417 {
   1418  return (len >= strlen(ns_diff_version) &&
   1419          fast_memeq(document, ns_diff_version, strlen(ns_diff_version)));
   1420 }