parsecommon.c (15320B)
1 /* Copyright (c) 2016-2021, The Tor Project, Inc. */ 2 /* See LICENSE for licensing information */ 3 4 /** 5 * \file parsecommon.c 6 * \brief Common code to parse and validate various type of descriptors. 7 **/ 8 9 #include "feature/dirparse/parsecommon.h" 10 #include "lib/log/log.h" 11 #include "lib/log/util_bug.h" 12 #include "lib/encoding/binascii.h" 13 #include "lib/container/smartlist.h" 14 #include "lib/string/util_string.h" 15 #include "lib/string/printf.h" 16 #include "lib/memarea/memarea.h" 17 #include "lib/crypt_ops/crypto_rsa.h" 18 #include "lib/ctime/di_ops.h" 19 20 #include <string.h> 21 22 #define MIN_ANNOTATION A_PURPOSE 23 #define MAX_ANNOTATION A_UNKNOWN_ 24 25 #define ALLOC_ZERO(sz) memarea_alloc_zero(area,sz) 26 #define ALLOC(sz) memarea_alloc(area,sz) 27 #define STRDUP(str) memarea_strdup(area,str) 28 #define STRNDUP(str,n) memarea_strndup(area,(str),(n)) 29 30 #define RET_ERR(msg) \ 31 STMT_BEGIN \ 32 if (tok) token_clear(tok); \ 33 tok = ALLOC_ZERO(sizeof(directory_token_t)); \ 34 tok->tp = ERR_; \ 35 tok->error = STRDUP(msg); \ 36 goto done_tokenizing; \ 37 STMT_END 38 39 /** Free all resources allocated for <b>tok</b> */ 40 void 41 token_clear(directory_token_t *tok) 42 { 43 if (tok->key) 44 crypto_pk_free(tok->key); 45 } 46 47 /** Read all tokens from a string between <b>start</b> and <b>end</b>, and add 48 * them to <b>out</b>. Parse according to the token rules in <b>table</b>. 49 * Caller must free tokens in <b>out</b>. If <b>end</b> is NULL, use the 50 * entire string. 51 */ 52 int 53 tokenize_string(memarea_t *area, 54 const char *start, const char *end, smartlist_t *out, 55 const token_rule_t *table, int flags) 56 { 57 const char **s; 58 directory_token_t *tok = NULL; 59 int counts[NIL_]; 60 int i; 61 int first_nonannotation; 62 int prev_len = smartlist_len(out); 63 tor_assert(area); 64 65 s = &start; 66 if (!end) { 67 end = start+strlen(start); 68 } else { 69 /* it's only meaningful to check for nuls if we got an end-of-string ptr */ 70 if (memchr(start, '\0', end-start)) { 71 log_warn(LD_DIR, "parse error: internal NUL character."); 72 return -1; 73 } 74 } 75 for (i = 0; i < NIL_; ++i) 76 counts[i] = 0; 77 78 SMARTLIST_FOREACH(out, const directory_token_t *, t, ++counts[t->tp]); 79 80 while (*s < end && (!tok || tok->tp != EOF_)) { 81 tok = get_next_token(area, s, end, table); 82 if (tok->tp == ERR_) { 83 log_warn(LD_DIR, "parse error: %s", tok->error); 84 token_clear(tok); 85 return -1; 86 } 87 ++counts[tok->tp]; 88 smartlist_add(out, tok); 89 *s = eat_whitespace_eos(*s, end); 90 } 91 92 if (flags & TS_NOCHECK) 93 return 0; 94 95 if ((flags & TS_ANNOTATIONS_OK)) { 96 first_nonannotation = -1; 97 for (i = 0; i < smartlist_len(out); ++i) { 98 tok = smartlist_get(out, i); 99 if (tok->tp < MIN_ANNOTATION || tok->tp > MAX_ANNOTATION) { 100 first_nonannotation = i; 101 break; 102 } 103 } 104 if (first_nonannotation < 0) { 105 log_warn(LD_DIR, "parse error: item contains only annotations"); 106 return -1; 107 } 108 for (i=first_nonannotation; i < smartlist_len(out); ++i) { 109 tok = smartlist_get(out, i); 110 if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) { 111 log_warn(LD_DIR, "parse error: Annotations mixed with keywords"); 112 return -1; 113 } 114 } 115 if ((flags & TS_NO_NEW_ANNOTATIONS)) { 116 if (first_nonannotation != prev_len) { 117 log_warn(LD_DIR, "parse error: Unexpected annotations."); 118 return -1; 119 } 120 } 121 } else { 122 for (i=0; i < smartlist_len(out); ++i) { 123 tok = smartlist_get(out, i); 124 if (tok->tp >= MIN_ANNOTATION && tok->tp <= MAX_ANNOTATION) { 125 log_warn(LD_DIR, "parse error: no annotations allowed."); 126 return -1; 127 } 128 } 129 first_nonannotation = 0; 130 } 131 for (i = 0; table[i].t; ++i) { 132 if (counts[table[i].v] < table[i].min_cnt) { 133 log_warn(LD_DIR, "Parse error: missing %s element.", table[i].t); 134 return -1; 135 } 136 if (counts[table[i].v] > table[i].max_cnt) { 137 log_warn(LD_DIR, "Parse error: too many %s elements.", table[i].t); 138 return -1; 139 } 140 if (table[i].pos & AT_START) { 141 if (smartlist_len(out) < 1 || 142 (tok = smartlist_get(out, first_nonannotation))->tp != table[i].v) { 143 log_warn(LD_DIR, "Parse error: first item is not %s.", table[i].t); 144 return -1; 145 } 146 } 147 if (table[i].pos & AT_END) { 148 if (smartlist_len(out) < 1 || 149 (tok = smartlist_get(out, smartlist_len(out)-1))->tp != table[i].v) { 150 log_warn(LD_DIR, "Parse error: last item is not %s.", table[i].t); 151 return -1; 152 } 153 } 154 } 155 return 0; 156 } 157 158 /** Helper: parse space-separated arguments from the string <b>s</b> ending at 159 * <b>eol</b>, and store them in the args field of <b>tok</b>. Store the 160 * number of parsed elements into the n_args field of <b>tok</b>. Allocate 161 * all storage in <b>area</b>. Return the number of arguments parsed, or 162 * return -1 if there was an insanely high number of arguments. */ 163 static inline int 164 get_token_arguments(memarea_t *area, directory_token_t *tok, 165 const char *s, const char *eol) 166 { 167 /** Largest number of arguments we'll accept to any token, ever. */ 168 #define MAX_ARGS 512 169 char *mem = memarea_strndup(area, s, eol-s); 170 char *cp = mem; 171 int j = 0; 172 char *args[MAX_ARGS]; 173 while (*cp) { 174 if (j == MAX_ARGS) 175 return -1; 176 args[j++] = cp; 177 cp = (char*)find_whitespace(cp); 178 if (!cp || !*cp) 179 break; /* End of the line. */ 180 *cp++ = '\0'; 181 cp = (char*)eat_whitespace(cp); 182 } 183 tok->n_args = j; 184 tok->args = memarea_memdup(area, args, j*sizeof(char*)); 185 return j; 186 #undef MAX_ARGS 187 } 188 189 /** Helper: make sure that the token <b>tok</b> with keyword <b>kwd</b> obeys 190 * the object syntax of <b>o_syn</b>. Allocate all storage in <b>area</b>. 191 * Return <b>tok</b> on success, or a new ERR_ token if the token didn't 192 * conform to the syntax we wanted. 193 **/ 194 static inline directory_token_t * 195 token_check_object(memarea_t *area, const char *kwd, 196 directory_token_t *tok, obj_syntax o_syn) 197 { 198 char ebuf[128]; 199 switch (o_syn) { 200 case NO_OBJ: 201 /* No object is allowed for this token. */ 202 if (tok->object_body) { 203 tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd); 204 RET_ERR(ebuf); 205 } 206 if (tok->key) { 207 tor_snprintf(ebuf, sizeof(ebuf), "Unexpected public key for %s", kwd); 208 RET_ERR(ebuf); 209 } 210 break; 211 case NEED_OBJ: 212 /* There must be a (non-key) object. */ 213 if (!tok->object_body) { 214 tor_snprintf(ebuf, sizeof(ebuf), "Missing object for %s", kwd); 215 RET_ERR(ebuf); 216 } 217 break; 218 case OPT_KEY_1024: 219 /* If there is anything, it must be a 1024-bit RSA key. */ 220 if (tok->object_body && !tok->key) { 221 tor_snprintf(ebuf, sizeof(ebuf), "Unexpected object for %s", kwd); 222 RET_ERR(ebuf); 223 } 224 if (!tok->key) { 225 break; 226 } 227 FALLTHROUGH; 228 case NEED_KEY_1024: /* There must be a 1024-bit public key. */ 229 if (tok->key && crypto_pk_num_bits(tok->key) != PK_BYTES*8) { 230 tor_snprintf(ebuf, sizeof(ebuf), "Wrong size on key for %s: %d bits", 231 kwd, crypto_pk_num_bits(tok->key)); 232 RET_ERR(ebuf); 233 } 234 FALLTHROUGH; 235 case NEED_KEY: /* There must be some kind of key. */ 236 if (!tok->key) { 237 tor_snprintf(ebuf, sizeof(ebuf), "Missing public key for %s", kwd); 238 RET_ERR(ebuf); 239 } 240 241 if (crypto_pk_key_is_private(tok->key)) { 242 tor_snprintf(ebuf, sizeof(ebuf), 243 "Private key given for %s, which wants a public key", kwd); 244 RET_ERR(ebuf); 245 } 246 break; 247 case OBJ_OK: 248 /* Anything goes with this token. */ 249 break; 250 } 251 252 done_tokenizing: 253 return tok; 254 } 255 256 /** Return true iff the <b>memlen</b>-byte chunk of memory at 257 * <b>memlen</b> is the same length as <b>token</b>, and their 258 * contents are equal. */ 259 static bool 260 mem_eq_token(const void *mem, size_t memlen, const char *token) 261 { 262 size_t len = strlen(token); 263 return memlen == len && fast_memeq(mem, token, len); 264 } 265 266 /** Helper function: read the next token from *s, advance *s to the end of the 267 * token, and return the parsed token. Parse *<b>s</b> according to the list 268 * of tokens in <b>table</b>. 269 */ 270 directory_token_t * 271 get_next_token(memarea_t *area, 272 const char **s, const char *eos, const token_rule_t *table) 273 { 274 /** Reject any object at least this big; it is probably an overflow, an 275 * attack, a bug, or some other nonsense. */ 276 #define MAX_UNPARSED_OBJECT_SIZE (128*1024) 277 /** Reject any line at least this big; it is probably an overflow, an 278 * attack, a bug, or some other nonsense. */ 279 #define MAX_LINE_LENGTH (128*1024) 280 281 const char *next, *eol; 282 size_t obname_len; 283 int i; 284 directory_token_t *tok; 285 obj_syntax o_syn = NO_OBJ; 286 char ebuf[128]; 287 const char *kwd = ""; 288 289 tor_assert(area); 290 tok = ALLOC_ZERO(sizeof(directory_token_t)); 291 tok->tp = ERR_; 292 293 /* Set *s to first token, eol to end-of-line, next to after first token */ 294 *s = eat_whitespace_eos(*s, eos); /* eat multi-line whitespace */ 295 tor_assert(eos >= *s); 296 eol = memchr(*s, '\n', eos-*s); 297 if (!eol) 298 eol = eos; 299 if (eol - *s > MAX_LINE_LENGTH) { 300 RET_ERR("Line far too long"); 301 } 302 303 next = find_whitespace_eos(*s, eol); 304 305 if (mem_eq_token(*s, next-*s, "opt")) { 306 /* Skip past an "opt" at the start of the line. */ 307 *s = eat_whitespace_eos_no_nl(next, eol); 308 next = find_whitespace_eos(*s, eol); 309 } else if (*s == eos) { /* If no "opt", and end-of-line, line is invalid */ 310 RET_ERR("Unexpected EOF"); 311 } 312 313 /* Search the table for the appropriate entry. (I tried a binary search 314 * instead, but it wasn't any faster.) */ 315 for (i = 0; table[i].t ; ++i) { 316 if (mem_eq_token(*s, next-*s, table[i].t)) { 317 /* We've found the keyword. */ 318 kwd = table[i].t; 319 tok->tp = table[i].v; 320 o_syn = table[i].os; 321 *s = eat_whitespace_eos_no_nl(next, eol); 322 /* We go ahead whether there are arguments or not, so that tok->args is 323 * always set if we want arguments. */ 324 if (table[i].concat_args) { 325 /* The keyword takes the line as a single argument */ 326 tok->args = ALLOC(sizeof(char*)); 327 tok->args[0] = STRNDUP(*s,eol-*s); /* Grab everything on line */ 328 tok->n_args = 1; 329 } else { 330 /* This keyword takes multiple arguments. */ 331 if (get_token_arguments(area, tok, *s, eol)<0) { 332 tor_snprintf(ebuf, sizeof(ebuf),"Far too many arguments to %s", kwd); 333 RET_ERR(ebuf); 334 } 335 *s = eol; 336 } 337 if (tok->n_args < table[i].min_args) { 338 tor_snprintf(ebuf, sizeof(ebuf), "Too few arguments to %s", kwd); 339 RET_ERR(ebuf); 340 } else if (tok->n_args > table[i].max_args) { 341 tor_snprintf(ebuf, sizeof(ebuf), "Too many arguments to %s", kwd); 342 RET_ERR(ebuf); 343 } 344 break; 345 } 346 } 347 348 if (tok->tp == ERR_) { 349 /* No keyword matched; call it an "K_opt" or "A_unrecognized" */ 350 if (*s < eol && **s == '@') 351 tok->tp = A_UNKNOWN_; 352 else 353 tok->tp = K_OPT; 354 tok->args = ALLOC(sizeof(char*)); 355 tok->args[0] = STRNDUP(*s, eol-*s); 356 tok->n_args = 1; 357 o_syn = OBJ_OK; 358 } 359 360 /* Check whether there's an object present */ 361 *s = eat_whitespace_eos(eol, eos); /* Scan from end of first line */ 362 tor_assert(eos >= *s); 363 eol = memchr(*s, '\n', eos-*s); 364 if (!eol || eol-*s<11 || strcmpstart(*s, "-----BEGIN ")) /* No object. */ 365 goto check_object; 366 367 if (eol - *s <= 16 || memchr(*s+11,'\0',eol-*s-16) || /* no short lines, */ 368 !mem_eq_token(eol-5, 5, "-----") || /* nuls or invalid endings */ 369 (eol-*s) > MAX_UNPARSED_OBJECT_SIZE) { /* name too long */ 370 RET_ERR("Malformed object: bad begin line"); 371 } 372 tok->object_type = STRNDUP(*s+11, eol-*s-16); 373 obname_len = eol-*s-16; /* store objname length here to avoid a strlen() */ 374 *s = eol+1; /* Set *s to possible start of object data (could be eos) */ 375 376 /* Go to the end of the object */ 377 next = tor_memstr(*s, eos-*s, "-----END "); 378 if (!next) { 379 RET_ERR("Malformed object: missing object end line"); 380 } 381 tor_assert(eos >= next); 382 eol = memchr(next, '\n', eos-next); 383 if (!eol) /* end-of-line marker, or eos if there's no '\n' */ 384 eol = eos; 385 /* Validate the ending tag, which should be 9 + NAME + 5 + eol */ 386 if ((size_t)(eol-next) != 9+obname_len+5 || 387 !mem_eq_token(next+9, obname_len, tok->object_type) || 388 !mem_eq_token(eol-5, 5, "-----")) { 389 tor_snprintf(ebuf, sizeof(ebuf), "Malformed object: mismatched end tag %s", 390 tok->object_type); 391 ebuf[sizeof(ebuf)-1] = '\0'; 392 RET_ERR(ebuf); 393 } 394 if (next - *s > MAX_UNPARSED_OBJECT_SIZE) 395 RET_ERR("Couldn't parse object: missing footer or object much too big."); 396 397 { 398 int r; 399 size_t maxsize = base64_decode_maxsize(next-*s); 400 tok->object_body = ALLOC(maxsize); 401 r = base64_decode(tok->object_body, maxsize, *s, next-*s); 402 if (r<0) 403 RET_ERR("Malformed object: bad base64-encoded data"); 404 tok->object_size = r; 405 } 406 407 if (!strcmp(tok->object_type, "RSA PUBLIC KEY")) { /* If it's a public key */ 408 if (o_syn != OPT_KEY_1024 && o_syn != NEED_KEY && 409 o_syn != NEED_KEY_1024 && o_syn != OBJ_OK) { 410 RET_ERR("Unexpected public key."); 411 } 412 tok->key = crypto_pk_asn1_decode(tok->object_body, tok->object_size); 413 if (! tok->key) 414 RET_ERR("Couldn't parse public key."); 415 } 416 *s = eol; 417 418 check_object: 419 tok = token_check_object(area, kwd, tok, o_syn); 420 421 done_tokenizing: 422 return tok; 423 424 #undef RET_ERR 425 #undef ALLOC 426 #undef ALLOC_ZERO 427 #undef STRDUP 428 #undef STRNDUP 429 } 430 431 /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; fail 432 * with an assert if no such keyword is found. 433 */ 434 directory_token_t * 435 find_by_keyword_(smartlist_t *s, directory_keyword keyword, 436 const char *keyword_as_string) 437 { 438 directory_token_t *tok = find_opt_by_keyword(s, keyword); 439 if (PREDICT_UNLIKELY(!tok)) { 440 log_err(LD_BUG, "Missing %s [%d] in directory object that should have " 441 "been validated. Internal error.", keyword_as_string, (int)keyword); 442 tor_assert(tok); 443 } 444 return tok; 445 } 446 447 /** Find the first token in <b>s</b> whose keyword is <b>keyword</b>; return 448 * NULL if no such keyword is found. 449 */ 450 directory_token_t * 451 find_opt_by_keyword(const smartlist_t *s, directory_keyword keyword) 452 { 453 SMARTLIST_FOREACH(s, directory_token_t *, t, if (t->tp == keyword) return t); 454 return NULL; 455 } 456 457 /** If there are any directory_token_t entries in <b>s</b> whose keyword is 458 * <b>k</b>, return a newly allocated smartlist_t containing all such entries, 459 * in the same order in which they occur in <b>s</b>. Otherwise return 460 * NULL. */ 461 smartlist_t * 462 find_all_by_keyword(const smartlist_t *s, directory_keyword k) 463 { 464 smartlist_t *out = NULL; 465 SMARTLIST_FOREACH(s, directory_token_t *, t, 466 if (t->tp == k) { 467 if (!out) 468 out = smartlist_new(); 469 smartlist_add(out, t); 470 }); 471 return out; 472 }