tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

parsecommon.c (15320B)


      1 /* Copyright (c) 2016-2021, The Tor Project, Inc. */
      2 /* See LICENSE for licensing information */
      3 
      4 /**
      5 * \file parsecommon.c
      6 * \brief Common code to parse and validate various type of descriptors.
      7 **/
      8 
      9 #include "feature/dirparse/parsecommon.h"
     10 #include "lib/log/log.h"
     11 #include "lib/log/util_bug.h"
     12 #include "lib/encoding/binascii.h"
     13 #include "lib/container/smartlist.h"
     14 #include "lib/string/util_string.h"
     15 #include "lib/string/printf.h"
     16 #include "lib/memarea/memarea.h"
     17 #include "lib/crypt_ops/crypto_rsa.h"
     18 #include "lib/ctime/di_ops.h"
     19 
     20 #include <string.h>
     21 
     22 #define MIN_ANNOTATION A_PURPOSE
     23 #define MAX_ANNOTATION A_UNKNOWN_
     24 
     25 #define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz)
     26 #define ALLOC(sz) memarea_alloc(area,sz)
     27 #define STRDUP(str) memarea_strdup(area,str)
     28 #define STRNDUP(str,n) memarea_strndup(area,(str),(n))
     29 
     30 #define RET_ERR(msg)                                               \
     31  STMT_BEGIN                                                       \
     32    if (tok) token_clear(tok);                                      \
     33    tok = ALLOC_ZERO(sizeof(directory_token_t));                   \
     34    tok->tp = ERR_;                                                \
     35    tok->error = STRDUP(msg);                                      \
     36    goto done_tokenizing;                                          \
     37  STMT_END
     38 
     39 /** Free all resources allocated for <b>tok</b> */
     40 void
     41 token_clear(directory_token_t *tok)
     42 {
     43  if (tok->key)
     44    crypto_pk_free(tok->key);
     45 }
     46 
     47 /** Read all tokens from a string between <b>start</b> and <b>end</b>, and add
     48 * them to <b>out</b>.  Parse according to the token rules in <b>table</b>.
     49 * Caller must free tokens in <b>out</b>.  If <b>end</b> is NULL, use the
     50 * entire string.
     51 */
     52 int
     53 tokenize_string(memarea_t *area,
     54                const char *start, const char *end, smartlist_t *out,
     55                const token_rule_t *table, int flags)
     56 {
     57  const char **s;
     58  directory_token_t *tok = NULL;
     59  int counts[NIL_];
     60  int i;
     61  int first_nonannotation;
     62  int prev_len = smartlist_len(out);
     63  tor_assert(area);
     64 
     65  s = &start;
     66  if (!end) {
     67    end = start+strlen(start);
     68  } else {
     69    /* it's only meaningful to check for nuls if we got an end-of-string ptr */
     70    if (memchr(start, '\0', end-start)) {
     71      log_warn(LD_DIR, "parse error: internal NUL character.");
     72      return -1;
     73    }
     74  }
     75  for (i = 0; i < NIL_; ++i)
     76    counts[i] = 0;
     77 
     78  SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]);
     79 
     80  while (*s < end && (!tok || tok->tp != EOF_)) {
     81    tok = get_next_token(area, s, end, table);
     82    if (tok->tp == ERR_) {
     83      log_warn(LD_DIR, "parse error: %s", tok->error);
     84      token_clear(tok);
     85      return -1;
     86    }
     87    ++counts[tok->tp];
     88    smartlist_add(out, tok);
     89    *s = eat_whitespace_eos(*s, end);
     90  }
     91 
     92  if (flags & TS_NOCHECK)
     93    return 0;
     94 
     95  if ((flags & TS_ANNOTATIONS_OK)) {
     96    first_nonannotation = -1;
     97    for (i = 0; i < smartlist_len(out); ++i) {
     98      tok = smartlist_get(out, i);
     99      if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) {
    100        first_nonannotation = i;
    101        break;
    102      }
    103    }
    104    if (first_nonannotation < 0) {
    105      log_warn(LD_DIR, "parse error: item contains only annotations");
    106      return -1;
    107    }
    108    for (i=first_nonannotation;  i < smartlist_len(out); ++i) {
    109      tok = smartlist_get(out, i);
    110      if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
    111        log_warn(LD_DIR, "parse error: Annotations mixed with keywords");
    112        return -1;
    113      }
    114    }
    115    if ((flags & TS_NO_NEW_ANNOTATIONS)) {
    116      if (first_nonannotation != prev_len) {
    117        log_warn(LD_DIR, "parse error: Unexpected annotations.");
    118        return -1;
    119      }
    120    }
    121  } else {
    122    for (i=0;  i < smartlist_len(out); ++i) {
    123      tok = smartlist_get(out, i);
    124      if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) {
    125        log_warn(LD_DIR, "parse error: no annotations allowed.");
    126        return -1;
    127      }
    128    }
    129    first_nonannotation = 0;
    130  }
    131  for (i = 0; table[i].t; ++i) {
    132    if (counts[table[i].v] < table[i].min_cnt) {
    133      log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t);
    134      return -1;
    135    }
    136    if (counts[table[i].v] > table[i].max_cnt) {
    137      log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t);
    138      return -1;
    139    }
    140    if (table[i].pos & AT_START) {
    141      if (smartlist_len(out) < 1 ||
    142          (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) {
    143        log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t);
    144        return -1;
    145      }
    146    }
    147    if (table[i].pos & AT_END) {
    148      if (smartlist_len(out) < 1 ||
    149          (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) {
    150        log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t);
    151        return -1;
    152      }
    153    }
    154  }
    155  return 0;
    156 }
    157 
    158 /** Helper: parse space-separated arguments from the string <b>s</b> ending at
    159 * <b>eol</b>, and store them in the args field of <b>tok</b>.  Store the
    160 * number of parsed elements into the n_args field of <b>tok</b>.  Allocate
    161 * all storage in <b>area</b>.  Return the number of arguments parsed, or
    162 * return -1 if there was an insanely high number of arguments. */
    163 static inline int
    164 get_token_arguments(memarea_t *area, directory_token_t *tok,
    165                    const char *s, const char *eol)
    166 {
    167 /** Largest number of arguments we'll accept to any token, ever. */
    168 #define MAX_ARGS 512
    169  char *mem = memarea_strndup(area, s, eol-s);
    170  char *cp = mem;
    171  int j = 0;
    172  char *args[MAX_ARGS];
    173  while (*cp) {
    174    if (j == MAX_ARGS)
    175      return -1;
    176    args[j++] = cp;
    177    cp = (char*)find_whitespace(cp);
    178    if (!cp || !*cp)
    179      break; /* End of the line. */
    180    *cp++ = '\0';
    181    cp = (char*)eat_whitespace(cp);
    182  }
    183  tok->n_args = j;
    184  tok->args = memarea_memdup(area, args, j*sizeof(char*));
    185  return j;
    186 #undef MAX_ARGS
    187 }
    188 
    189 /** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys
    190 * the object syntax of <b>o_syn</b>.  Allocate all storage in <b>area</b>.
    191 * Return <b>tok</b> on success, or a new ERR_ token if the token didn't
    192 * conform to the syntax we wanted.
    193 **/
    194 static inline directory_token_t *
    195 token_check_object(memarea_t *area, const char *kwd,
    196                   directory_token_t *tok, obj_syntax o_syn)
    197 {
    198  char ebuf[128];
    199  switch (o_syn) {
    200    case NO_OBJ:
    201      /* No object is allowed for this token. */
    202      if (tok->object_body) {
    203        tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
    204        RET_ERR(ebuf);
    205      }
    206      if (tok->key) {
    207        tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd);
    208        RET_ERR(ebuf);
    209      }
    210      break;
    211    case NEED_OBJ:
    212      /* There must be a (non-key) object. */
    213      if (!tok->object_body) {
    214        tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd);
    215        RET_ERR(ebuf);
    216      }
    217      break;
    218    case OPT_KEY_1024:
    219      /* If there is anything, it must be a 1024-bit RSA key. */
    220      if (tok->object_body && !tok->key) {
    221        tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd);
    222        RET_ERR(ebuf);
    223      }
    224      if (!tok->key) {
    225        break;
    226      }
    227      FALLTHROUGH;
    228    case NEED_KEY_1024: /* There must be a 1024-bit public key. */
    229      if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) {
    230        tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits",
    231                     kwd, crypto_pk_num_bits(tok->key));
    232        RET_ERR(ebuf);
    233      }
    234      FALLTHROUGH;
    235    case NEED_KEY: /* There must be some kind of key. */
    236      if (!tok->key) {
    237        tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd);
    238        RET_ERR(ebuf);
    239      }
    240 
    241      if (crypto_pk_key_is_private(tok->key)) {
    242        tor_snprintf(ebuf, sizeof(ebuf),
    243                "Private key given for %s, which wants a public key", kwd);
    244        RET_ERR(ebuf);
    245      }
    246      break;
    247    case OBJ_OK:
    248      /* Anything goes with this token. */
    249      break;
    250  }
    251 
    252 done_tokenizing:
    253  return tok;
    254 }
    255 
    256 /** Return true iff the <b>memlen</b>-byte chunk of memory at
    257 * <b>memlen</b> is the same length as <b>token</b>, and their
    258 * contents are equal. */
    259 static bool
    260 mem_eq_token(const void *mem, size_t memlen, const char *token)
    261 {
    262  size_t len = strlen(token);
    263  return memlen == len && fast_memeq(mem, token, len);
    264 }
    265 
    266 /** Helper function: read the next token from *s, advance *s to the end of the
    267 * token, and return the parsed token.  Parse *<b>s</b> according to the list
    268 * of tokens in <b>table</b>.
    269 */
    270 directory_token_t *
    271 get_next_token(memarea_t *area,
    272               const char **s, const char *eos, const token_rule_t *table)
    273 {
    274  /** Reject any object at least this big; it is probably an overflow, an
    275   * attack, a bug, or some other nonsense. */
    276 #define MAX_UNPARSED_OBJECT_SIZE (128*1024)
    277  /** Reject any line at least this big; it is probably an overflow, an
    278   * attack, a bug, or some other nonsense. */
    279 #define MAX_LINE_LENGTH (128*1024)
    280 
    281  const char *next, *eol;
    282  size_t obname_len;
    283  int i;
    284  directory_token_t *tok;
    285  obj_syntax o_syn = NO_OBJ;
    286  char ebuf[128];
    287  const char *kwd = "";
    288 
    289  tor_assert(area);
    290  tok = ALLOC_ZERO(sizeof(directory_token_t));
    291  tok->tp = ERR_;
    292 
    293  /* Set *s to first token, eol to end-of-line, next to after first token */
    294  *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */
    295  tor_assert(eos >= *s);
    296  eol = memchr(*s, '\n', eos-*s);
    297  if (!eol)
    298    eol = eos;
    299  if (eol - *s > MAX_LINE_LENGTH) {
    300    RET_ERR("Line far too long");
    301  }
    302 
    303  next = find_whitespace_eos(*s, eol);
    304 
    305  if (mem_eq_token(*s, next-*s, "opt")) {
    306    /* Skip past an "opt" at the start of the line. */
    307    *s = eat_whitespace_eos_no_nl(next, eol);
    308    next = find_whitespace_eos(*s, eol);
    309  } else if (*s == eos) {  /* If no "opt", and end-of-line, line is invalid */
    310    RET_ERR("Unexpected EOF");
    311  }
    312 
    313  /* Search the table for the appropriate entry.  (I tried a binary search
    314   * instead, but it wasn't any faster.) */
    315  for (i = 0; table[i].t ; ++i) {
    316    if (mem_eq_token(*s, next-*s, table[i].t)) {
    317      /* We've found the keyword. */
    318      kwd = table[i].t;
    319      tok->tp = table[i].v;
    320      o_syn = table[i].os;
    321      *s = eat_whitespace_eos_no_nl(next, eol);
    322      /* We go ahead whether there are arguments or not, so that tok->args is
    323       * always set if we want arguments. */
    324      if (table[i].concat_args) {
    325        /* The keyword takes the line as a single argument */
    326        tok->args = ALLOC(sizeof(char*));
    327        tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */
    328        tok->n_args = 1;
    329      } else {
    330        /* This keyword takes multiple arguments. */
    331        if (get_token_arguments(area, tok, *s, eol)<0) {
    332          tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd);
    333          RET_ERR(ebuf);
    334        }
    335        *s = eol;
    336      }
    337      if (tok->n_args < table[i].min_args) {
    338        tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd);
    339        RET_ERR(ebuf);
    340      } else if (tok->n_args > table[i].max_args) {
    341        tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd);
    342        RET_ERR(ebuf);
    343      }
    344      break;
    345    }
    346  }
    347 
    348  if (tok->tp == ERR_) {
    349    /* No keyword matched; call it an "K_opt" or "A_unrecognized" */
    350    if (*s < eol && **s == '@')
    351      tok->tp = A_UNKNOWN_;
    352    else
    353      tok->tp = K_OPT;
    354    tok->args = ALLOC(sizeof(char*));
    355    tok->args[0] = STRNDUP(*s, eol-*s);
    356    tok->n_args = 1;
    357    o_syn = OBJ_OK;
    358  }
    359 
    360  /* Check whether there's an object present */
    361  *s = eat_whitespace_eos(eol, eos);  /* Scan from end of first line */
    362  tor_assert(eos >= *s);
    363  eol = memchr(*s, '\n', eos-*s);
    364  if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */
    365    goto check_object;
    366 
    367  if (eol - *s <= 16 || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */
    368      !mem_eq_token(eol-5, 5, "-----") ||   /* nuls or invalid endings */
    369      (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) {     /* name too long */
    370    RET_ERR("Malformed object: bad begin line");
    371  }
    372  tok->object_type = STRNDUP(*s+11, eol-*s-16);
    373  obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */
    374  *s = eol+1;    /* Set *s to possible start of object data (could be eos) */
    375 
    376  /* Go to the end of the object */
    377  next = tor_memstr(*s, eos-*s, "-----END ");
    378  if (!next) {
    379    RET_ERR("Malformed object: missing object end line");
    380  }
    381  tor_assert(eos >= next);
    382  eol = memchr(next, '\n', eos-next);
    383  if (!eol)  /* end-of-line marker, or eos if there's no '\n' */
    384    eol = eos;
    385  /* Validate the ending tag, which should be 9 + NAME + 5 + eol */
    386  if ((size_t)(eol-next) != 9+obname_len+5 ||
    387      !mem_eq_token(next+9, obname_len, tok->object_type) ||
    388      !mem_eq_token(eol-5, 5, "-----")) {
    389    tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s",
    390             tok->object_type);
    391    ebuf[sizeof(ebuf)-1] = '\0';
    392    RET_ERR(ebuf);
    393  }
    394  if (next - *s > MAX_UNPARSED_OBJECT_SIZE)
    395    RET_ERR("Couldn't parse object: missing footer or object much too big.");
    396 
    397  {
    398    int r;
    399    size_t maxsize = base64_decode_maxsize(next-*s);
    400    tok->object_body = ALLOC(maxsize);
    401    r = base64_decode(tok->object_body, maxsize, *s, next-*s);
    402    if (r<0)
    403      RET_ERR("Malformed object: bad base64-encoded data");
    404    tok->object_size = r;
    405  }
    406 
    407  if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */
    408    if (o_syn != OPT_KEY_1024 && o_syn != NEED_KEY &&
    409        o_syn != NEED_KEY_1024 && o_syn != OBJ_OK) {
    410      RET_ERR("Unexpected public key.");
    411    }
    412    tok->key = crypto_pk_asn1_decode(tok->object_body, tok->object_size);
    413    if (! tok->key)
    414      RET_ERR("Couldn't parse public key.");
    415  }
    416  *s = eol;
    417 
    418 check_object:
    419  tok = token_check_object(area, kwd, tok, o_syn);
    420 
    421 done_tokenizing:
    422  return tok;
    423 
    424 #undef RET_ERR
    425 #undef ALLOC
    426 #undef ALLOC_ZERO
    427 #undef STRDUP
    428 #undef STRNDUP
    429 }
    430 
    431 /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail
    432 * with an assert if no such keyword is found.
    433 */
    434 directory_token_t *
    435 find_by_keyword_(smartlist_t *s, directory_keyword keyword,
    436                 const char *keyword_as_string)
    437 {
    438  directory_token_t *tok = find_opt_by_keyword(s, keyword);
    439  if (PREDICT_UNLIKELY(!tok)) {
    440    log_err(LD_BUG, "Missing %s [%d] in directory object that should have "
    441         "been validated. Internal error.", keyword_as_string, (int)keyword);
    442    tor_assert(tok);
    443  }
    444  return tok;
    445 }
    446 
    447 /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return
    448 * NULL if no such keyword is found.
    449 */
    450 directory_token_t *
    451 find_opt_by_keyword(const smartlist_t *s, directory_keyword keyword)
    452 {
    453  SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t);
    454  return NULL;
    455 }
    456 
    457 /** If there are any directory_token_t entries in <b>s</b> whose keyword is
    458 * <b>k</b>, return a newly allocated smartlist_t containing all such entries,
    459 * in the same order in which they occur in <b>s</b>.  Otherwise return
    460 * NULL. */
    461 smartlist_t *
    462 find_all_by_keyword(const smartlist_t *s, directory_keyword k)
    463 {
    464  smartlist_t *out = NULL;
    465  SMARTLIST_FOREACH(s, directory_token_t *, t,
    466                    if (t->tp == k) {
    467                    if (!out)
    468                    out = smartlist_new();
    469                    smartlist_add(out, t);
    470                    });
    471  return out;
    472 }