scheduler.c (26216B)
1 /* Copyright (c) 2013-2021, The Tor Project, Inc. */ 2 /* See LICENSE for licensing information */ 3 4 #include "core/or/or.h" 5 #include "app/config/config.h" 6 7 #include "lib/evloop/compat_libevent.h" 8 #define SCHEDULER_PRIVATE 9 #define SCHEDULER_KIST_PRIVATE 10 #include "core/or/scheduler.h" 11 #include "core/mainloop/mainloop.h" 12 #include "lib/buf/buffers.h" 13 #define CHANNEL_OBJECT_PRIVATE 14 #include "core/or/channeltls.h" 15 16 #include "core/or/or_connection_st.h" 17 18 /** 19 * \file scheduler.c 20 * \brief Channel scheduling system: decides which channels should send and 21 * receive when. 22 * 23 * This module is the global/common parts of the scheduling system. This system 24 * is what decides what channels get to send cells on their circuits and when. 25 * 26 * Terms: 27 * - "Scheduling system": the collection of scheduler*.{h,c} files and their 28 * aggregate behavior. 29 * - "Scheduler implementation": a scheduler_t. The scheduling system has one 30 * active scheduling implementation at a time. 31 * 32 * In this file you will find state that any scheduler implementation can have 33 * access to as well as the functions the rest of Tor uses to interact with the 34 * scheduling system. 35 * 36 * The earliest versions of Tor approximated a kind of round-robin system 37 * among active connections, but only approximated it. It would only consider 38 * one connection (roughly equal to a channel in today's terms) at a time, and 39 * thus could only prioritize circuits against others on the same connection. 40 * 41 * Then in response to the KIST paper[0], Tor implemented a global 42 * circuit scheduler. It was supposed to prioritize circuits across many 43 * channels, but wasn't effective. It is preserved in scheduler_vanilla.c. 44 * 45 * [0]: https://www.robgjansen.com/publications/kist-sec2014.pdf 46 * 47 * Then we actually got around to implementing KIST for real. We decided to 48 * modularize the scheduler so new ones can be implemented. You can find KIST 49 * in scheduler_kist.c. 50 * 51 * Channels have one of four scheduling states based on whether or not they 52 * have cells to send and whether or not they are able to send. 53 * 54 * <ol> 55 * <li> 56 * Not open for writes, no cells to send. 57 * <ul><li> Not much to do here, and the channel will have scheduler_state 58 * == SCHED_CHAN_IDLE 59 * <li> Transitions from: 60 * <ul> 61 * <li>Open for writes/has cells by simultaneously draining all circuit 62 * queues and filling the output buffer. 63 * </ul> 64 * <li> Transitions to: 65 * <ul> 66 * <li> Not open for writes/has cells by arrival of cells on an attached 67 * circuit (this would be driven from append_cell_to_circuit_queue()) 68 * <li> Open for writes/no cells by a channel type specific path; 69 * driven from connection_or_flushed_some() for channel_tls_t. 70 * </ul> 71 * </ul> 72 * 73 * <li> Open for writes, no cells to send 74 * <ul> 75 * <li>Not much here either; this will be the state an idle but open 76 * channel can be expected to settle in. It will have scheduler_state 77 * == SCHED_CHAN_WAITING_FOR_CELLS 78 * <li> Transitions from: 79 * <ul> 80 * <li>Not open for writes/no cells by flushing some of the output 81 * buffer. 82 * <li>Open for writes/has cells by the scheduler moving cells from 83 * circuit queues to channel output queue, but not having enough 84 * to fill the output queue. 85 * </ul> 86 * <li> Transitions to: 87 * <ul> 88 * <li>Open for writes/has cells by arrival of new cells on an attached 89 * circuit, in append_cell_to_circuit_queue() 90 * </ul> 91 * </ul> 92 * 93 * <li>Not open for writes, cells to send 94 * <ul> 95 * <li>This is the state of a busy circuit limited by output bandwidth; 96 * cells have piled up in the circuit queues waiting to be relayed. 97 * The channel will have scheduler_state == SCHED_CHAN_WAITING_TO_WRITE. 98 * <li> Transitions from: 99 * <ul> 100 * <li>Not open for writes/no cells by arrival of cells on an attached 101 * circuit 102 * <li>Open for writes/has cells by filling an output buffer without 103 * draining all cells from attached circuits 104 * </ul> 105 * <li> Transitions to: 106 * <ul> 107 * <li>Opens for writes/has cells by draining some of the output buffer 108 * via the connection_or_flushed_some() path (for channel_tls_t). 109 * </ul> 110 * </ul> 111 * 112 * <li>Open for writes, cells to send 113 * <ul> 114 * <li>This connection is ready to relay some cells and waiting for 115 * the scheduler to choose it. The channel will have scheduler_state == 116 * SCHED_CHAN_PENDING. 117 * <li>Transitions from: 118 * <ul> 119 * <li>Not open for writes/has cells by the connection_or_flushed_some() 120 * path 121 * <li>Open for writes/no cells by the append_cell_to_circuit_queue() 122 * path 123 * </ul> 124 * <li> Transitions to: 125 * <ul> 126 * <li>Not open for writes/no cells by draining all circuit queues and 127 * simultaneously filling the output buffer. 128 * <li>Not open for writes/has cells by writing enough cells to fill the 129 * output buffer 130 * <li>Open for writes/no cells by draining all attached circuit queues 131 * without also filling the output buffer 132 * </ul> 133 * </ul> 134 * </ol> 135 * 136 * Other event-driven parts of the code move channels between these scheduling 137 * states by calling scheduler functions. The scheduling system builds up a 138 * list of channels in the SCHED_CHAN_PENDING state that the scheduler 139 * implementation should then use when it runs. Scheduling implementations need 140 * to properly update channel states during their scheduler_t->run() function 141 * as that is the only opportunity for channels to move from SCHED_CHAN_PENDING 142 * to any other state. 143 * 144 * The remainder of this file is a small amount of state that any scheduler 145 * implementation should have access to, and the functions the rest of Tor uses 146 * to interact with the scheduling system. 147 */ 148 149 /***************************************************************************** 150 * Scheduling system state 151 * 152 * State that can be accessed from any scheduler implementation (but not 153 * outside the scheduling system) 154 *****************************************************************************/ 155 156 /** DOCDOC */ 157 STATIC const scheduler_t *the_scheduler; 158 159 /** 160 * We keep a list of channels that are pending - i.e, have cells to write 161 * and can accept them to send. The enum scheduler_state in channel_t 162 * is reserved for our use. 163 * 164 * Priority queue of channels that can write and have cells (pending work) 165 */ 166 STATIC smartlist_t *channels_pending = NULL; 167 168 /** 169 * This event runs the scheduler from its callback, and is manually 170 * activated whenever a channel enters open for writes/cells to send. 171 */ 172 STATIC struct mainloop_event_t *run_sched_ev = NULL; 173 174 static int have_logged_kist_suddenly_disabled = 0; 175 176 /***************************************************************************** 177 * Scheduling system static function definitions 178 * 179 * Functions that can only be accessed from this file. 180 *****************************************************************************/ 181 182 /** Return a human readable string for the given scheduler type. */ 183 static const char * 184 get_scheduler_type_string(scheduler_types_t type) 185 { 186 switch (type) { 187 case SCHEDULER_VANILLA: 188 return "Vanilla"; 189 case SCHEDULER_KIST: 190 return "KIST"; 191 case SCHEDULER_KIST_LITE: 192 return "KISTLite"; 193 case SCHEDULER_NONE: 194 FALLTHROUGH; 195 default: 196 tor_assert_unreached(); 197 return "(N/A)"; 198 } 199 } 200 201 /** 202 * Scheduler event callback; this should get triggered once per event loop 203 * if any scheduling work was created during the event loop. 204 */ 205 static void 206 scheduler_evt_callback(mainloop_event_t *event, void *arg) 207 { 208 (void) event; 209 (void) arg; 210 211 log_debug(LD_SCHED, "Scheduler event callback called"); 212 213 /* Run the scheduler. This is a mandatory function. */ 214 215 /* We might as well assert on this. If this function doesn't exist, no cells 216 * are getting scheduled. Things are very broken. scheduler_t says the run() 217 * function is mandatory. */ 218 tor_assert(the_scheduler->run); 219 the_scheduler->run(); 220 221 /* Schedule itself back in if it has more work. */ 222 223 /* Again, might as well assert on this mandatory scheduler_t function. If it 224 * doesn't exist, there's no way to tell libevent to run the scheduler again 225 * in the future. */ 226 tor_assert(the_scheduler->schedule); 227 the_scheduler->schedule(); 228 } 229 230 /** Using the global options, select the scheduler we should be using. */ 231 static void 232 select_scheduler(void) 233 { 234 scheduler_t *new_scheduler = NULL; 235 236 #ifdef TOR_UNIT_TESTS 237 /* This is hella annoying to set in the options for every test that passes 238 * through the scheduler and there are many so if we don't explicitly have 239 * a list of types set, just put the vanilla one. */ 240 if (get_options()->SchedulerTypes_ == NULL) { 241 the_scheduler = get_vanilla_scheduler(); 242 return; 243 } 244 #endif /* defined(TOR_UNIT_TESTS) */ 245 246 /* This list is ordered that is first entry has the first priority. Thus, as 247 * soon as we find a scheduler type that we can use, we use it and stop. */ 248 SMARTLIST_FOREACH_BEGIN(get_options()->SchedulerTypes_, int *, type) { 249 switch (*type) { 250 case SCHEDULER_VANILLA: 251 new_scheduler = get_vanilla_scheduler(); 252 goto end; 253 case SCHEDULER_KIST: 254 if (!scheduler_can_use_kist()) { 255 #ifdef HAVE_KIST_SUPPORT 256 if (!have_logged_kist_suddenly_disabled) { 257 /* We should only log this once in most cases. If it was the kernel 258 * losing support for kist that caused scheduler_can_use_kist() to 259 * return false, then this flag makes sure we only log this message 260 * once. If it was the consensus that switched from "yes use kist" 261 * to "no don't use kist", then we still set the flag so we log 262 * once, but we unset the flag elsewhere if we ever can_use_kist() 263 * again. 264 */ 265 have_logged_kist_suddenly_disabled = 1; 266 log_notice(LD_SCHED, "Scheduler type KIST has been disabled by " 267 "the consensus or no kernel support."); 268 } 269 #else /* !defined(HAVE_KIST_SUPPORT) */ 270 log_info(LD_SCHED, "Scheduler type KIST not built in"); 271 #endif /* defined(HAVE_KIST_SUPPORT) */ 272 continue; 273 } 274 /* This flag will only get set in one of two cases: 275 * 1 - the kernel lost support for kist. In that case, we don't expect to 276 * ever end up here 277 * 2 - the consensus went from "yes use kist" to "no don't use kist". 278 * We might end up here if the consensus changes back to "yes", in which 279 * case we might want to warn the user again if it goes back to "no" 280 * yet again. Thus we unset the flag */ 281 have_logged_kist_suddenly_disabled = 0; 282 new_scheduler = get_kist_scheduler(); 283 scheduler_kist_set_full_mode(); 284 goto end; 285 case SCHEDULER_KIST_LITE: 286 new_scheduler = get_kist_scheduler(); 287 scheduler_kist_set_lite_mode(); 288 goto end; 289 case SCHEDULER_NONE: 290 FALLTHROUGH; 291 default: 292 /* Our option validation should have caught this. */ 293 tor_assert_unreached(); 294 } 295 } SMARTLIST_FOREACH_END(type); 296 297 end: 298 if (new_scheduler == NULL) { 299 log_err(LD_SCHED, "Tor was unable to select a scheduler type. Please " 300 "make sure Schedulers is correctly configured with " 301 "what Tor does support."); 302 /* We weren't able to choose a scheduler which means that none of the ones 303 * set in Schedulers are supported or usable. We will respect the user 304 * wishes of using what it has been configured and don't do a sneaky 305 * fallback. Because this can be changed at runtime, we have to stop tor 306 * right now. */ 307 exit(1); // XXXX bad exit 308 } 309 310 /* Set the chosen scheduler. */ 311 the_scheduler = new_scheduler; 312 } 313 314 /** 315 * Helper function called from a few different places. It changes the 316 * scheduler implementation, if necessary. And if it did, it then tells the 317 * old one to free its state and the new one to initialize. 318 */ 319 static void 320 set_scheduler(void) 321 { 322 const scheduler_t *old_scheduler = the_scheduler; 323 scheduler_types_t old_scheduler_type = SCHEDULER_NONE; 324 325 /* We keep track of the type in order to log only if the type switched. We 326 * can't just use the scheduler pointers because KIST and KISTLite share the 327 * same object. */ 328 if (the_scheduler) { 329 old_scheduler_type = the_scheduler->type; 330 } 331 332 /* From the options, select the scheduler type to set. */ 333 select_scheduler(); 334 tor_assert(the_scheduler); 335 336 /* We look at the pointer difference in case the old sched and new sched 337 * share the same scheduler object, as is the case with KIST and KISTLite. */ 338 if (old_scheduler != the_scheduler) { 339 /* Allow the old scheduler to clean up, if needed. */ 340 if (old_scheduler && old_scheduler->free_all) { 341 old_scheduler->free_all(); 342 } 343 344 /* Initialize the new scheduler. */ 345 if (the_scheduler->init) { 346 the_scheduler->init(); 347 } 348 } 349 350 /* Finally we notice log if we switched schedulers. We use the type in case 351 * two schedulers share a scheduler object. */ 352 if (old_scheduler_type != the_scheduler->type) { 353 log_info(LD_CONFIG, "Scheduler type %s has been enabled.", 354 get_scheduler_type_string(the_scheduler->type)); 355 } 356 } 357 358 /***************************************************************************** 359 * Scheduling system private function definitions 360 * 361 * Functions that can only be accessed from scheduler*.c 362 *****************************************************************************/ 363 364 /** Returns human readable string for the given channel scheduler state. */ 365 const char * 366 get_scheduler_state_string(int scheduler_state) 367 { 368 switch (scheduler_state) { 369 case SCHED_CHAN_IDLE: 370 return "IDLE"; 371 case SCHED_CHAN_WAITING_FOR_CELLS: 372 return "WAITING_FOR_CELLS"; 373 case SCHED_CHAN_WAITING_TO_WRITE: 374 return "WAITING_TO_WRITE"; 375 case SCHED_CHAN_PENDING: 376 return "PENDING"; 377 default: 378 return "(invalid)"; 379 } 380 } 381 382 /** Helper that logs channel scheduler_state changes. Use this instead of 383 * setting scheduler_state directly. */ 384 void 385 scheduler_set_channel_state(channel_t *chan, int new_state) 386 { 387 log_debug(LD_SCHED, "chan %" PRIu64 " changed from scheduler state %s to %s", 388 chan->global_identifier, 389 get_scheduler_state_string(chan->scheduler_state), 390 get_scheduler_state_string(new_state)); 391 chan->scheduler_state = new_state; 392 } 393 394 /** Return the pending channel list. */ 395 smartlist_t * 396 get_channels_pending(void) 397 { 398 return channels_pending; 399 } 400 401 /** Comparison function to use when sorting pending channels. */ 402 MOCK_IMPL(int, 403 scheduler_compare_channels, (const void *c1_v, const void *c2_v)) 404 { 405 const channel_t *c1 = NULL, *c2 = NULL; 406 /* These are a workaround for -Wbad-function-cast throwing a fit */ 407 const circuitmux_policy_t *p1, *p2; 408 uintptr_t p1_i, p2_i; 409 410 tor_assert(c1_v); 411 tor_assert(c2_v); 412 413 c1 = (const channel_t *)(c1_v); 414 c2 = (const channel_t *)(c2_v); 415 416 if (c1 != c2) { 417 if (circuitmux_get_policy(c1->cmux) == 418 circuitmux_get_policy(c2->cmux)) { 419 /* Same cmux policy, so use the mux comparison */ 420 return circuitmux_compare_muxes(c1->cmux, c2->cmux); 421 } else { 422 /* 423 * Different policies; not important to get this edge case perfect 424 * because the current code never actually gives different channels 425 * different cmux policies anyway. Just use this arbitrary but 426 * definite choice. 427 */ 428 p1 = circuitmux_get_policy(c1->cmux); 429 p2 = circuitmux_get_policy(c2->cmux); 430 p1_i = (uintptr_t)p1; 431 p2_i = (uintptr_t)p2; 432 433 return (p1_i < p2_i) ? -1 : 1; 434 } 435 } else { 436 /* c1 == c2, so always equal */ 437 return 0; 438 } 439 } 440 441 /***************************************************************************** 442 * Scheduling system global functions 443 * 444 * Functions that can be accessed from anywhere in Tor. 445 *****************************************************************************/ 446 447 /** 448 * This is how the scheduling system is notified of Tor's configuration 449 * changing. For example: a SIGHUP was issued. 450 */ 451 void 452 scheduler_conf_changed(void) 453 { 454 /* Let the scheduler decide what it should do. */ 455 set_scheduler(); 456 457 /* Then tell the (possibly new) scheduler that we have new options. */ 458 if (the_scheduler->on_new_options) { 459 the_scheduler->on_new_options(); 460 } 461 } 462 463 /** 464 * Whenever we get a new consensus, this function is called. 465 */ 466 void 467 scheduler_notify_networkstatus_changed(void) 468 { 469 /* Maybe the consensus param made us change the scheduler. */ 470 set_scheduler(); 471 472 /* Then tell the (possibly new) scheduler that we have a new consensus */ 473 if (the_scheduler->on_new_consensus) { 474 the_scheduler->on_new_consensus(); 475 } 476 } 477 478 /** 479 * Free everything scheduling-related from main.c. Note this is only called 480 * when Tor is shutting down, while scheduler_t->free_all() is called both when 481 * Tor is shutting down and when we are switching schedulers. 482 */ 483 void 484 scheduler_free_all(void) 485 { 486 log_debug(LD_SCHED, "Shutting down scheduler"); 487 488 if (run_sched_ev) { 489 mainloop_event_free(run_sched_ev); 490 run_sched_ev = NULL; 491 } 492 493 if (channels_pending) { 494 /* We don't have ownership of the objects in this list. */ 495 smartlist_free(channels_pending); 496 channels_pending = NULL; 497 } 498 499 if (the_scheduler && the_scheduler->free_all) { 500 the_scheduler->free_all(); 501 } 502 the_scheduler = NULL; 503 } 504 505 /** Mark a channel as no longer ready to accept writes. 506 * 507 * Possible state changes: 508 * - SCHED_CHAN_PENDING -> SCHED_CHAN_WAITING_TO_WRITE 509 * - SCHED_CHAN_WAITING_FOR_CELLS -> SCHED_CHAN_IDLE 510 */ 511 MOCK_IMPL(void, 512 scheduler_channel_doesnt_want_writes,(channel_t *chan)) 513 { 514 IF_BUG_ONCE(!chan) { 515 return; 516 } 517 IF_BUG_ONCE(!channels_pending) { 518 return; 519 } 520 521 if (chan->scheduler_state == SCHED_CHAN_PENDING) { 522 /* 523 * It has cells but no longer can write, so it becomes 524 * SCHED_CHAN_WAITING_TO_WRITE. It's in channels_pending, so we 525 * should remove it from the list. 526 */ 527 smartlist_pqueue_remove(channels_pending, 528 scheduler_compare_channels, 529 offsetof(channel_t, sched_heap_idx), 530 chan); 531 scheduler_set_channel_state(chan, SCHED_CHAN_WAITING_TO_WRITE); 532 } else if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) { 533 /* 534 * It does not have cells and no longer can write, so it becomes 535 * SCHED_CHAN_IDLE. 536 */ 537 scheduler_set_channel_state(chan, SCHED_CHAN_IDLE); 538 } 539 } 540 541 /** Mark a channel as having waiting cells. 542 * 543 * Possible state changes: 544 * - SCHED_CHAN_WAITING_FOR_CELLS -> SCHED_CHAN_PENDING 545 * - SCHED_CHAN_IDLE -> SCHED_CHAN_WAITING_TO_WRITE 546 */ 547 MOCK_IMPL(void, 548 scheduler_channel_has_waiting_cells,(channel_t *chan)) 549 { 550 IF_BUG_ONCE(!chan) { 551 return; 552 } 553 IF_BUG_ONCE(!channels_pending) { 554 return; 555 } 556 557 if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) { 558 /* 559 * It is able to write and now has cells, so it becomes 560 * SCHED_CHAN_PENDING. It must be added to the channels_pending 561 * list. 562 */ 563 scheduler_set_channel_state(chan, SCHED_CHAN_PENDING); 564 if (!SCHED_BUG(chan->sched_heap_idx != -1, chan)) { 565 smartlist_pqueue_add(channels_pending, 566 scheduler_compare_channels, 567 offsetof(channel_t, sched_heap_idx), 568 chan); 569 } 570 /* If we made a channel pending, we potentially have scheduling work to 571 * do. */ 572 the_scheduler->schedule(); 573 } else if (chan->scheduler_state == SCHED_CHAN_IDLE) { 574 /* 575 * It is not able to write but now has cells, so it becomes 576 * SCHED_CHAN_WAITING_TO_WRITE. 577 */ 578 scheduler_set_channel_state(chan, SCHED_CHAN_WAITING_TO_WRITE); 579 } 580 } 581 582 /** Add the scheduler event to the set of pending events with next_run being 583 * the longest time libevent should wait before triggering the event. */ 584 void 585 scheduler_ev_add(const struct timeval *next_run) 586 { 587 tor_assert(run_sched_ev); 588 tor_assert(next_run); 589 if (BUG(mainloop_event_schedule(run_sched_ev, next_run) < 0)) { 590 log_warn(LD_SCHED, "Adding to libevent failed. Next run time was set to: " 591 "%ld.%06ld", next_run->tv_sec, (long)next_run->tv_usec); 592 return; 593 } 594 } 595 596 /** Make the scheduler event active with the given flags. */ 597 void 598 scheduler_ev_active(void) 599 { 600 tor_assert(run_sched_ev); 601 mainloop_event_activate(run_sched_ev); 602 } 603 604 /* 605 * Initialize everything scheduling-related from config.c. Note this is only 606 * called when Tor is starting up, while scheduler_t->init() is called both 607 * when Tor is starting up and when we are switching schedulers. 608 */ 609 void 610 scheduler_init(void) 611 { 612 log_debug(LD_SCHED, "Initting scheduler"); 613 614 // Two '!' because we really do want to check if the pointer is non-NULL 615 IF_BUG_ONCE(!!run_sched_ev) { 616 log_warn(LD_SCHED, "We should not already have a libevent scheduler event." 617 "I'll clean the old one up, but this is odd."); 618 mainloop_event_free(run_sched_ev); 619 run_sched_ev = NULL; 620 } 621 run_sched_ev = mainloop_event_new(scheduler_evt_callback, NULL); 622 channels_pending = smartlist_new(); 623 624 set_scheduler(); 625 } 626 627 /* 628 * If a channel is going away, this is how the scheduling system is informed 629 * so it can do any freeing necessary. This ultimately calls 630 * scheduler_t->on_channel_free() so the current scheduler can release any 631 * state specific to this channel. 632 */ 633 MOCK_IMPL(void, 634 scheduler_release_channel,(channel_t *chan)) 635 { 636 IF_BUG_ONCE(!chan) { 637 return; 638 } 639 IF_BUG_ONCE(!channels_pending) { 640 return; 641 } 642 643 /* Try to remove the channel from the pending list regardless of its 644 * scheduler state. We can release a channel in many places in the tor code 645 * so we can't rely on the channel state (PENDING) to remove it from the 646 * list. 647 * 648 * For instance, the channel can change state from OPEN to CLOSING while 649 * being handled in the scheduler loop leading to the channel being in 650 * PENDING state but not in the pending list. Furthermore, we release the 651 * channel when it changes state to close and a second time when we free it. 652 * Not ideal at all but for now that is the way it is. */ 653 if (chan->sched_heap_idx != -1) { 654 smartlist_pqueue_remove(channels_pending, 655 scheduler_compare_channels, 656 offsetof(channel_t, sched_heap_idx), 657 chan); 658 } 659 660 if (the_scheduler->on_channel_free) { 661 the_scheduler->on_channel_free(chan); 662 } 663 scheduler_set_channel_state(chan, SCHED_CHAN_IDLE); 664 } 665 666 /** Mark a channel as ready to accept writes. 667 * Possible state changes: 668 * 669 * - SCHED_CHAN_WAITING_TO_WRITE -> SCHED_CHAN_PENDING 670 * - SCHED_CHAN_IDLE -> SCHED_CHAN_WAITING_FOR_CELLS 671 */ 672 void 673 scheduler_channel_wants_writes(channel_t *chan) 674 { 675 IF_BUG_ONCE(!chan) { 676 return; 677 } 678 IF_BUG_ONCE(!channels_pending) { 679 return; 680 } 681 682 if (chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE) { 683 /* 684 * It has cells and can now write, so it becomes 685 * SCHED_CHAN_PENDING. It must be added to the channels_pending 686 * list. 687 */ 688 scheduler_set_channel_state(chan, SCHED_CHAN_PENDING); 689 if (!SCHED_BUG(chan->sched_heap_idx != -1, chan)) { 690 smartlist_pqueue_add(channels_pending, 691 scheduler_compare_channels, 692 offsetof(channel_t, sched_heap_idx), 693 chan); 694 } 695 /* We just made a channel pending, we have scheduling work to do. */ 696 the_scheduler->schedule(); 697 } else if (chan->scheduler_state == SCHED_CHAN_IDLE) { 698 /* 699 * It does not have cells but can now write, so it becomes 700 * SCHED_CHAN_WAITING_FOR_CELLS. 701 */ 702 scheduler_set_channel_state(chan, SCHED_CHAN_WAITING_FOR_CELLS); 703 } 704 } 705 706 /* Log warn the given channel and extra scheduler context as well. This is 707 * used by SCHED_BUG() in order to be able to extract as much information as 708 * we can when we hit a bug. Channel chan can be NULL. */ 709 void 710 scheduler_bug_occurred(const channel_t *chan) 711 { 712 char buf[128]; 713 714 if (chan != NULL) { 715 const size_t outbuf_len = 716 buf_datalen(TO_CONN(CONST_BASE_CHAN_TO_TLS(chan)->conn)->outbuf); 717 tor_snprintf(buf, sizeof(buf), 718 "Channel %" PRIu64 " in state %s and scheduler state %s." 719 " Num cells on cmux: %d. Connection outbuf len: %lu.", 720 chan->global_identifier, 721 channel_state_to_string(chan->state), 722 get_scheduler_state_string(chan->scheduler_state), 723 circuitmux_num_cells(chan->cmux), 724 (unsigned long)outbuf_len); 725 } 726 727 { 728 char *msg; 729 /* Rate limit every 60 seconds. If we start seeing this every 60 sec, we 730 * know something is stuck/wrong. It *should* be loud but not too much. */ 731 static ratelim_t rlimit = RATELIM_INIT(60); 732 if ((msg = rate_limit_log(&rlimit, approx_time()))) { 733 log_warn(LD_BUG, "%s Num pending channels: %d. " 734 "Channel in pending list: %s.%s", 735 (chan != NULL) ? buf : "No channel in bug context.", 736 smartlist_len(channels_pending), 737 (smartlist_pos(channels_pending, chan) == -1) ? "no" : "yes", 738 msg); 739 tor_free(msg); 740 } 741 } 742 } 743 744 #ifdef TOR_UNIT_TESTS 745 746 /* 747 * Notify scheduler that a channel's queue position may have changed. 748 */ 749 void 750 scheduler_touch_channel(channel_t *chan) 751 { 752 IF_BUG_ONCE(!chan) { 753 return; 754 } 755 756 if (chan->scheduler_state == SCHED_CHAN_PENDING) { 757 /* Remove and re-add it */ 758 smartlist_pqueue_remove(channels_pending, 759 scheduler_compare_channels, 760 offsetof(channel_t, sched_heap_idx), 761 chan); 762 smartlist_pqueue_add(channels_pending, 763 scheduler_compare_channels, 764 offsetof(channel_t, sched_heap_idx), 765 chan); 766 } 767 /* else no-op, since it isn't in the queue */ 768 } 769 770 #endif /* defined(TOR_UNIT_TESTS) */