circuitpadding_machines.c (20339B)
1 /* Copyright (c) 2019 The Tor Project, Inc. */ 2 /* See LICENSE for licensing information */ 3 4 /** 5 * \file circuitpadding_machines.c 6 * \brief Circuit padding state machines 7 * 8 * Introduce circuit padding machines that will be used by Tor circuits, as 9 * specified by proposal 302 "Hiding onion service clients using padding". 10 * 11 * Right now this file introduces two machines that aim to hide the client-side 12 * of onion service circuits against naive classifiers like the ones from the 13 * "Circuit Fingerprinting Attacks: Passive Deanonymization of Tor Hidden 14 * Services" paper from USENIX. By naive classifiers we mean classifiers that 15 * use basic features like "circuit construction circuits" and "incoming and 16 * outgoing cell counts" and "duration of activity". 17 * 18 * In particular, these machines aim to be lightweight and protect against 19 * these basic classifiers. They don't aim to protect against more advanced 20 * attacks that use deep learning or even correlate various circuit 21 * construction events together. Machines that fool such advanced classifiers 22 * are also possible, but they can't be so lightweight and might require more 23 * WTF-PAD features. So for now we opt for the following two machines: 24 * 25 * Client-side introduction circuit hiding machine: 26 * 27 * This machine hides client-side introduction circuits by making their 28 * circuit construction sequence look like normal general circuits that 29 * download directory information. Furthermore, the circuits are kept open 30 * until all the padding has been sent, since intro circuits are usually 31 * very short lived and this act as a distinguisher. For more info see 32 * circpad_machine_client_hide_intro_circuits() and the sec. 33 * 34 * Client-side rendezvous circuit hiding machine: 35 * 36 * This machine hides client-side rendezvous circuits by making their 37 * circuit construction sequence look like normal general circuits. For more 38 * details see circpad_machine_client_hide_rend_circuits() and the spec. 39 * 40 * TODO: These are simple machines that carefully manipulate the cells of the 41 * initial circuit setup procedure to make them look like general 42 * circuits. In the future, more states can be baked into their state machine 43 * to do more advanced obfuscation. 44 **/ 45 46 #define CIRCUITPADDING_MACHINES_PRIVATE 47 48 #include "core/or/or.h" 49 #include "feature/nodelist/networkstatus.h" 50 51 #include "lib/crypt_ops/crypto_rand.h" 52 53 #include "core/or/circuitlist.h" 54 55 #include "core/or/circuitpadding_machines.h" 56 #include "core/or/circuitpadding.h" 57 58 /** Create a client-side padding machine that aims to hide IP circuits. In 59 * particular, it keeps intro circuits alive until a bunch of fake traffic has 60 * been pushed through. 61 */ 62 void 63 circpad_machine_client_hide_intro_circuits(smartlist_t *machines_sl) 64 { 65 circpad_machine_spec_t *client_machine 66 = tor_malloc_zero(sizeof(circpad_machine_spec_t)); 67 68 client_machine->name = "client_ip_circ"; 69 70 client_machine->conditions.apply_state_mask = CIRCPAD_CIRC_OPENED; 71 client_machine->target_hopnum = 2; 72 73 /* This is a client machine */ 74 client_machine->is_origin_side = 1; 75 76 /* We only want to pad introduction circuits, and we want to start padding 77 * only after the INTRODUCE1 cell has been sent, so set the purposes 78 * appropriately. 79 * 80 * In particular we want introduction circuits to blend as much as possible 81 * with general circuits. Most general circuits have the following initial 82 * relay cell sequence (outgoing cells marked in [brackets]): 83 * 84 * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [BEGIN] -> CONNECTED 85 * -> [DATA] -> [DATA] -> DATA -> DATA...(inbound data cells continue) 86 * 87 * Whereas normal introduction circuits usually look like: 88 * 89 * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 90 * -> [INTRO1] -> INTRODUCE_ACK 91 * 92 * This means that up to the sixth cell (first line of each sequence above), 93 * both general and intro circuits have identical cell sequences. After that 94 * we want to mimic the second line sequence of 95 * -> [DATA] -> [DATA] -> DATA -> DATA...(inbound data cells continue) 96 * 97 * We achieve this by starting padding INTRODUCE1 has been sent. With padding 98 * negotiation cells, in the common case of the second line looks like: 99 * -> [INTRO1] -> [PADDING_NEGOTIATE] -> PADDING_NEGOTIATED -> INTRO_ACK 100 * 101 * Then, the middle node will send between INTRO_MACHINE_MINIMUM_PADDING and 102 * INTRO_MACHINE_MAXIMUM_PADDING cells, to match the "...(inbound data cells 103 * continue)" portion of the trace (aka the rest of an HTTPS response body). 104 */ 105 106 /* Start the machine on fresh intro circs. */ 107 client_machine->conditions.apply_purpose_mask = 108 circpad_circ_purpose_to_mask(CIRCUIT_PURPOSE_C_INTRODUCE_ACK_WAIT); 109 110 /* If the client purpose changes back to CIRCUIT_PURPOSE_C_INTRODUCING, 111 * or transitions to CIRCUIT_PURPOSE_C_INTRODUCE_ACKED, keep the machine 112 * alive, but do not launch new machines for these purposes. Also 113 * keep the machine around if it is in the CIRCUIT_PADDING purpose 114 * (but do not try to take over other machines in that purpose). */ 115 client_machine->conditions.keep_purpose_mask = 116 circpad_circ_purpose_to_mask(CIRCUIT_PURPOSE_C_INTRODUCE_ACKED) | 117 circpad_circ_purpose_to_mask(CIRCUIT_PURPOSE_C_CIRCUIT_PADDING); 118 119 /* Keep the circuit alive even after the introduction has been finished, 120 * otherwise the short-term lifetime of the circuit will blow our cover */ 121 client_machine->manage_circ_lifetime = 1; 122 123 /* Set padding machine limits to help guard against excessive padding */ 124 client_machine->allowed_padding_count = INTRO_MACHINE_MAXIMUM_PADDING; 125 client_machine->max_padding_percent = 1; 126 127 /* Two states: START, OBFUSCATE_CIRC_SETUP (and END) */ 128 circpad_machine_states_init(client_machine, 2); 129 130 /* For the origin-side machine, we transition to OBFUSCATE_CIRC_SETUP after 131 * sending PADDING_NEGOTIATE, and we stay there (without sending any padding) 132 * until we receive a STOP from the other side. */ 133 client_machine->states[CIRCPAD_STATE_START]. 134 next_state[CIRCPAD_EVENT_NONPADDING_SENT] = 135 CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP; 136 137 /* origin-side machine has no event reactions while in 138 * CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP, so no more state transitions here. */ 139 140 /* The client side should never send padding, so it does not need 141 * to specify token removal, or a histogram definition or state lengths. 142 * That is all controlled by the middle node. */ 143 144 /* Register the machine */ 145 client_machine->machine_num = smartlist_len(machines_sl); 146 circpad_register_padding_machine(client_machine, machines_sl); 147 148 log_info(LD_CIRC, 149 "Registered client intro point hiding padding machine (%u)", 150 client_machine->machine_num); 151 } 152 153 /** Create a relay-side padding machine that aims to hide IP circuits. See 154 * comments on the function above for more details on the workings of the 155 * machine. */ 156 void 157 circpad_machine_relay_hide_intro_circuits(smartlist_t *machines_sl) 158 { 159 circpad_machine_spec_t *relay_machine 160 = tor_malloc_zero(sizeof(circpad_machine_spec_t)); 161 162 relay_machine->name = "relay_ip_circ"; 163 164 relay_machine->conditions.apply_state_mask = CIRCPAD_CIRC_OPENED; 165 166 /* This is a relay-side machine */ 167 relay_machine->is_origin_side = 0; 168 169 /* We want to negotiate END from this side after all our padding is done, so 170 * that the origin-side machine goes into END state, and eventually closes 171 * the circuit. */ 172 relay_machine->should_negotiate_end = 1; 173 174 /* Set padding machine limits to help guard against excessive padding */ 175 relay_machine->allowed_padding_count = INTRO_MACHINE_MAXIMUM_PADDING; 176 relay_machine->max_padding_percent = 1; 177 178 /* Two states: START, OBFUSCATE_CIRC_SETUP (and END) */ 179 circpad_machine_states_init(relay_machine, 2); 180 181 /* For the relay-side machine, we want to transition 182 * START -> OBFUSCATE_CIRC_SETUP upon first non-padding 183 * cell sent (PADDING_NEGOTIATED in this case). */ 184 relay_machine->states[CIRCPAD_STATE_START]. 185 next_state[CIRCPAD_EVENT_NONPADDING_SENT] = 186 CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP; 187 188 /* For the relay-side, we want to transition from OBFUSCATE_CIRC_SETUP to END 189 * state when the length finishes. */ 190 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 191 next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END; 192 193 /* Now let's define the OBF -> OBF transitions that maintain our padding 194 * flow: 195 * 196 * For the relay-side machine, we want to keep on sending padding bytes even 197 * when nothing else happens on this circuit. */ 198 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 199 next_state[CIRCPAD_EVENT_PADDING_SENT] = 200 CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP; 201 /* For the relay-side machine, we need this transition so that we re-enter 202 the state, after PADDING_NEGOTIATED is sent. Otherwise, the remove token 203 function will disable the timer, and nothing will restart it since there 204 is no other motion on an intro circuit. */ 205 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 206 next_state[CIRCPAD_EVENT_NONPADDING_SENT] = 207 CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP; 208 209 /* Token removal strategy for OBFUSCATE_CIRC_SETUP state: Don't 210 * remove any tokens. 211 * 212 * We rely on the state length sampling and not token removal, to avoid 213 * the mallocs required to copy the histograms for token removal, 214 * and to avoid monotime calls needed to determine histogram 215 * bins for token removal. */ 216 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 217 token_removal = CIRCPAD_TOKEN_REMOVAL_NONE; 218 219 /* Figure out the length of the OBFUSCATE_CIRC_SETUP state so that it's 220 * randomized. The relay side will send between INTRO_MACHINE_MINIMUM_PADDING 221 * and INTRO_MACHINE_MAXIMUM_PADDING padding cells towards the client. */ 222 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 223 length_dist.type = CIRCPAD_DIST_UNIFORM; 224 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 225 length_dist.param1 = INTRO_MACHINE_MINIMUM_PADDING; 226 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 227 length_dist.param2 = INTRO_MACHINE_MAXIMUM_PADDING; 228 229 /* Configure histogram */ 230 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 231 histogram_len = 2; 232 233 /* For the relay-side machine we want to batch padding instantly to pretend 234 * its an incoming directory download. So set the histogram edges tight: 235 * (1, 10ms, infinity). */ 236 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 237 histogram_edges[0] = 1000; 238 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 239 histogram_edges[1] = 10000; 240 241 /* We put all our tokens in bin 0, which means we want 100% probability 242 * for choosing a inter-packet delay of between 1000 and 10000 microseconds 243 * (1 to 10ms). Since we only have 1 bin, it doesn't matter how many tokens 244 * there are, 1000 out of 1000 is 100% */ 245 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 246 histogram[0] = 1000; 247 248 /* just one bin, so setup the total tokens */ 249 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 250 histogram_total_tokens = 251 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP].histogram[0]; 252 253 /* Register the machine */ 254 relay_machine->machine_num = smartlist_len(machines_sl); 255 circpad_register_padding_machine(relay_machine, machines_sl); 256 257 log_info(LD_CIRC, 258 "Registered relay intro circuit hiding padding machine (%u)", 259 relay_machine->machine_num); 260 } 261 262 /************************** Rendezvous-circuit machine ***********************/ 263 264 /** Create a client-side padding machine that aims to hide rendezvous 265 * circuits.*/ 266 void 267 circpad_machine_client_hide_rend_circuits(smartlist_t *machines_sl) 268 { 269 circpad_machine_spec_t *client_machine 270 = tor_malloc_zero(sizeof(circpad_machine_spec_t)); 271 272 client_machine->name = "client_rp_circ"; 273 274 /* Only pad after the circuit has been built and pad to the middle */ 275 client_machine->conditions.apply_state_mask = CIRCPAD_CIRC_OPENED; 276 client_machine->target_hopnum = 2; 277 278 /* This is a client machine */ 279 client_machine->is_origin_side = 1; 280 281 /* We only want to pad rendezvous circuits, and we want to start padding only 282 * after the rendezvous circuit has been established. 283 * 284 * Following a similar argument as for intro circuits, we are aiming for 285 * padded rendezvous circuits to blend in with the initial cell sequence of 286 * general circuits which usually look like this: 287 * 288 * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [BEGIN] -> CONNECTED 289 * -> [DATA] -> [DATA] -> DATA -> DATA...(incoming cells continue) 290 * 291 * Whereas normal rendezvous circuits usually look like: 292 * 293 * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [EST_REND] -> REND_EST 294 * -> REND2 -> [BEGIN] 295 * 296 * This means that up to the sixth cell (in the first line), both general and 297 * rend circuits have identical cell sequences. 298 * 299 * After that we want to mimic a [DATA] -> [DATA] -> DATA -> DATA sequence. 300 * 301 * With padding negotiation right after the REND_ESTABLISHED, the sequence 302 * becomes: 303 * 304 * [EXTEND2] -> EXTENDED2 -> [EXTEND2] -> EXTENDED2 -> [EST_REND] -> REND_EST 305 * -> [PADDING_NEGOTIATE] -> [DROP] -> PADDING_NEGOTIATED -> DROP... 306 * 307 * After which normal application DATA cells continue on the circuit. 308 * 309 * Hence this way we make rendezvous circuits look like general circuits up 310 * till the end of the circuit setup. */ 311 client_machine->conditions.apply_purpose_mask = 312 circpad_circ_purpose_to_mask(CIRCUIT_PURPOSE_C_REND_JOINED)| 313 circpad_circ_purpose_to_mask(CIRCUIT_PURPOSE_C_REND_READY)| 314 circpad_circ_purpose_to_mask(CIRCUIT_PURPOSE_C_REND_READY_INTRO_ACKED); 315 316 /* Set padding machine limits to help guard against excessive padding */ 317 client_machine->allowed_padding_count = 1; 318 client_machine->max_padding_percent = 1; 319 320 /* Two states: START, OBFUSCATE_CIRC_SETUP (and END) */ 321 circpad_machine_states_init(client_machine, 2); 322 323 /* START -> OBFUSCATE_CIRC_SETUP transition upon sending the first 324 * non-padding cell (which is PADDING_NEGOTIATE) */ 325 client_machine->states[CIRCPAD_STATE_START]. 326 next_state[CIRCPAD_EVENT_NONPADDING_SENT] = 327 CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP; 328 329 /* OBFUSCATE_CIRC_SETUP -> END transition when we send our first 330 * padding packet and/or hit the state length (the state length is 1). */ 331 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 332 next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_END; 333 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 334 next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END; 335 336 /* Don't use a token removal strategy since we don't want to use monotime 337 * functions and we want to avoid mallocing histogram copies. We want 338 * this machine to be light. */ 339 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 340 token_removal = CIRCPAD_TOKEN_REMOVAL_NONE; 341 342 /* Instead, to control the volume of padding (we just want to send a single 343 * padding cell) we will use a static state length. We just want one token, 344 * since we want to make the following pattern: 345 * [PADDING_NEGOTIATE] -> [DROP] -> PADDING_NEGOTIATED -> DROP */ 346 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 347 length_dist.type = CIRCPAD_DIST_UNIFORM; 348 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 349 length_dist.param1 = 1; 350 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 351 length_dist.param2 = 2; // rand(1,2) is always 1 352 353 /* Histogram is: (0 msecs, 1 msec, infinity). We want this to be fast so 354 * that we send our outgoing [DROP] before the PADDING_NEGOTIATED comes 355 * back from the relay side. */ 356 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 357 histogram_len = 2; 358 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 359 histogram_edges[0] = 0; 360 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 361 histogram_edges[1] = 1000; 362 363 /* We want a 100% probability of choosing an inter-packet delay of 364 * between 0 and 1ms. Since we don't use token removal, 365 * the number of tokens does not matter. (And also, state_length 366 * governs how many packets we send). */ 367 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 368 histogram[0] = 1; 369 client_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 370 histogram_total_tokens = 1; 371 372 /* Register the machine */ 373 client_machine->machine_num = smartlist_len(machines_sl); 374 circpad_register_padding_machine(client_machine, machines_sl); 375 376 log_info(LD_CIRC, 377 "Registered client rendezvous circuit hiding padding machine (%u)", 378 client_machine->machine_num); 379 } 380 381 /** Create a relay-side padding machine that aims to hide IP circuits. 382 * 383 * This is meant to follow the client-side machine. 384 */ 385 void 386 circpad_machine_relay_hide_rend_circuits(smartlist_t *machines_sl) 387 { 388 circpad_machine_spec_t *relay_machine 389 = tor_malloc_zero(sizeof(circpad_machine_spec_t)); 390 391 relay_machine->name = "relay_rp_circ"; 392 393 /* Only pad after the circuit has been built and pad to the middle */ 394 relay_machine->conditions.min_hops = 2; 395 relay_machine->conditions.apply_state_mask = CIRCPAD_CIRC_OPENED; 396 397 /* This is a relay-side machine */ 398 relay_machine->is_origin_side = 0; 399 400 /* Set padding machine limits to help guard against excessive padding */ 401 relay_machine->allowed_padding_count = 1; 402 relay_machine->max_padding_percent = 1; 403 404 /* Two states: START, OBFUSCATE_CIRC_SETUP (and END) */ 405 circpad_machine_states_init(relay_machine, 2); 406 407 /* START -> OBFUSCATE_CIRC_SETUP transition upon sending the first 408 * non-padding cell (which is PADDING_NEGOTIATED) */ 409 relay_machine->states[CIRCPAD_STATE_START]. 410 next_state[CIRCPAD_EVENT_NONPADDING_SENT] = 411 CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP; 412 413 /* OBFUSCATE_CIRC_SETUP -> END transition when we send our first 414 * padding packet and/or hit the state length (the state length is 1). */ 415 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 416 next_state[CIRCPAD_EVENT_PADDING_SENT] = CIRCPAD_STATE_END; 417 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 418 next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END; 419 420 /* Don't use a token removal strategy since we don't want to use monotime 421 * functions and we want to avoid mallocing histogram copies. We want 422 * this machine to be light. */ 423 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 424 token_removal = CIRCPAD_TOKEN_REMOVAL_NONE; 425 426 /* Instead, to control the volume of padding (we just want to send a single 427 * padding cell) we will use a static state length. We just want one token, 428 * since we want to make the following pattern: 429 * [PADDING_NEGOTIATE] -> [DROP] -> PADDING_NEGOTIATED -> DROP */ 430 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 431 length_dist.type = CIRCPAD_DIST_UNIFORM; 432 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 433 length_dist.param1 = 1; 434 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 435 length_dist.param2 = 2; // rand(1,2) is always 1 436 437 /* Histogram is: (0 msecs, 1 msec, infinity). We want this to be fast so 438 * that the outgoing DROP cell is sent immediately after the 439 * PADDING_NEGOTIATED. */ 440 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 441 histogram_len = 2; 442 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 443 histogram_edges[0] = 0; 444 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 445 histogram_edges[1] = 1000; 446 447 /* We want a 100% probability of choosing an inter-packet delay of 448 * between 0 and 1ms. Since we don't use token removal, 449 * the number of tokens does not matter. (And also, state_length 450 * governs how many packets we send). */ 451 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 452 histogram[0] = 1; 453 relay_machine->states[CIRCPAD_STATE_OBFUSCATE_CIRC_SETUP]. 454 histogram_total_tokens = 1; 455 456 /* Register the machine */ 457 relay_machine->machine_num = smartlist_len(machines_sl); 458 circpad_register_padding_machine(relay_machine, machines_sl); 459 460 log_info(LD_CIRC, 461 "Registered relay rendezvous circuit hiding padding machine (%u)", 462 relay_machine->machine_num); 463 }