tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

scheduler.c (26216B)


      1 /* Copyright (c) 2013-2021, The Tor Project, Inc. */
      2 /* See LICENSE for licensing information */
      3 
      4 #include "core/or/or.h"
      5 #include "app/config/config.h"
      6 
      7 #include "lib/evloop/compat_libevent.h"
      8 #define SCHEDULER_PRIVATE
      9 #define SCHEDULER_KIST_PRIVATE
     10 #include "core/or/scheduler.h"
     11 #include "core/mainloop/mainloop.h"
     12 #include "lib/buf/buffers.h"
     13 #define CHANNEL_OBJECT_PRIVATE
     14 #include "core/or/channeltls.h"
     15 
     16 #include "core/or/or_connection_st.h"
     17 
     18 /**
     19 * \file scheduler.c
     20 * \brief Channel scheduling system: decides which channels should send and
     21 * receive when.
     22 *
     23 * This module is the global/common parts of the scheduling system. This system
     24 * is what decides what channels get to send cells on their circuits and when.
     25 *
     26 * Terms:
     27 * - "Scheduling system": the collection of scheduler*.{h,c} files and their
     28 *   aggregate behavior.
     29 * - "Scheduler implementation": a scheduler_t. The scheduling system has one
     30 *   active scheduling implementation at a time.
     31 *
     32 * In this file you will find state that any scheduler implementation can have
     33 * access to as well as the functions the rest of Tor uses to interact with the
     34 * scheduling system.
     35 *
     36 * The earliest versions of Tor approximated a kind of round-robin system
     37 * among active connections, but only approximated it. It would only consider
     38 * one connection (roughly equal to a channel in today's terms) at a time, and
     39 * thus could only prioritize circuits against others on the same connection.
     40 *
     41 * Then in response to the KIST paper[0], Tor implemented a global
     42 * circuit scheduler. It was supposed to prioritize circuits across many
     43 * channels, but wasn't effective. It is preserved in scheduler_vanilla.c.
     44 *
     45 * [0]: https://www.robgjansen.com/publications/kist-sec2014.pdf
     46 *
     47 * Then we actually got around to implementing KIST for real. We decided to
     48 * modularize the scheduler so new ones can be implemented. You can find KIST
     49 * in scheduler_kist.c.
     50 *
     51 * Channels have one of four scheduling states based on whether or not they
     52 * have cells to send and whether or not they are able to send.
     53 *
     54 * <ol>
     55 * <li>
     56 *   Not open for writes, no cells to send.
     57 *     <ul><li> Not much to do here, and the channel will have scheduler_state
     58 *       == SCHED_CHAN_IDLE
     59 *     <li> Transitions from:
     60 *       <ul>
     61 *       <li>Open for writes/has cells by simultaneously draining all circuit
     62 *         queues and filling the output buffer.
     63 *       </ul>
     64 *     <li> Transitions to:
     65 *      <ul>
     66 *       <li> Not open for writes/has cells by arrival of cells on an attached
     67 *         circuit (this would be driven from append_cell_to_circuit_queue())
     68 *       <li> Open for writes/no cells by a channel type specific path;
     69 *         driven from connection_or_flushed_some() for channel_tls_t.
     70 *      </ul>
     71 *    </ul>
     72 *
     73 * <li> Open for writes, no cells to send
     74 *   <ul>
     75 *     <li>Not much here either; this will be the state an idle but open
     76 *       channel can be expected to settle in.  It will have scheduler_state
     77 *       == SCHED_CHAN_WAITING_FOR_CELLS
     78 *     <li> Transitions from:
     79 *       <ul>
     80 *       <li>Not open for writes/no cells by flushing some of the output
     81 *         buffer.
     82 *       <li>Open for writes/has cells by the scheduler moving cells from
     83 *         circuit queues to channel output queue, but not having enough
     84 *         to fill the output queue.
     85 *       </ul>
     86 *     <li> Transitions to:
     87 *       <ul>
     88 *        <li>Open for writes/has cells by arrival of new cells on an attached
     89 *         circuit, in append_cell_to_circuit_queue()
     90 *       </ul>
     91 *     </ul>
     92 *
     93 * <li>Not open for writes, cells to send
     94 *     <ul>
     95 *     <li>This is the state of a busy circuit limited by output bandwidth;
     96 *       cells have piled up in the circuit queues waiting to be relayed.
     97 *       The channel will have scheduler_state == SCHED_CHAN_WAITING_TO_WRITE.
     98 *     <li> Transitions from:
     99 *       <ul>
    100 *       <li>Not open for writes/no cells by arrival of cells on an attached
    101 *         circuit
    102 *       <li>Open for writes/has cells by filling an output buffer without
    103 *         draining all cells from attached circuits
    104 *       </ul>
    105 *    <li> Transitions to:
    106 *       <ul>
    107 *       <li>Opens for writes/has cells by draining some of the output buffer
    108 *         via the connection_or_flushed_some() path (for channel_tls_t).
    109 *       </ul>
    110 *    </ul>
    111 *
    112 * <li>Open for writes, cells to send
    113 *     <ul>
    114 *     <li>This connection is ready to relay some cells and waiting for
    115 *       the scheduler to choose it.  The channel will have scheduler_state ==
    116 *       SCHED_CHAN_PENDING.
    117 *     <li>Transitions from:
    118 *       <ul>
    119 *       <li>Not open for writes/has cells by the connection_or_flushed_some()
    120 *         path
    121 *       <li>Open for writes/no cells by the append_cell_to_circuit_queue()
    122 *         path
    123 *       </ul>
    124 *     <li> Transitions to:
    125 *       <ul>
    126 *        <li>Not open for writes/no cells by draining all circuit queues and
    127 *          simultaneously filling the output buffer.
    128 *        <li>Not open for writes/has cells by writing enough cells to fill the
    129 *         output buffer
    130 *        <li>Open for writes/no cells by draining all attached circuit queues
    131 *         without also filling the output buffer
    132 *       </ul>
    133 *    </ul>
    134 * </ol>
    135 *
    136 * Other event-driven parts of the code move channels between these scheduling
    137 * states by calling scheduler functions. The scheduling system builds up a
    138 * list of channels in the SCHED_CHAN_PENDING state that the scheduler
    139 * implementation should then use when it runs. Scheduling implementations need
    140 * to properly update channel states during their scheduler_t->run() function
    141 * as that is the only opportunity for channels to move from SCHED_CHAN_PENDING
    142 * to any other state.
    143 *
    144 * The remainder of this file is a small amount of state that any scheduler
    145 * implementation should have access to, and the functions the rest of Tor uses
    146 * to interact with the scheduling system.
    147 */
    148 
    149 /*****************************************************************************
    150 * Scheduling system state
    151 *
    152 * State that can be accessed from any scheduler implementation (but not
    153 * outside the scheduling system)
    154 *****************************************************************************/
    155 
    156 /** DOCDOC */
    157 STATIC const scheduler_t *the_scheduler;
    158 
    159 /**
    160 * We keep a list of channels that are pending - i.e, have cells to write
    161 * and can accept them to send. The enum scheduler_state in channel_t
    162 * is reserved for our use.
    163 *
    164 * Priority queue of channels that can write and have cells (pending work)
    165 */
    166 STATIC smartlist_t *channels_pending = NULL;
    167 
    168 /**
    169 * This event runs the scheduler from its callback, and is manually
    170 * activated whenever a channel enters open for writes/cells to send.
    171 */
    172 STATIC struct mainloop_event_t *run_sched_ev = NULL;
    173 
    174 static int have_logged_kist_suddenly_disabled = 0;
    175 
    176 /*****************************************************************************
    177 * Scheduling system static function definitions
    178 *
    179 * Functions that can only be accessed from this file.
    180 *****************************************************************************/
    181 
    182 /** Return a human readable string for the given scheduler type. */
    183 static const char *
    184 get_scheduler_type_string(scheduler_types_t type)
    185 {
    186  switch (type) {
    187  case SCHEDULER_VANILLA:
    188    return "Vanilla";
    189  case SCHEDULER_KIST:
    190    return "KIST";
    191  case SCHEDULER_KIST_LITE:
    192    return "KISTLite";
    193  case SCHEDULER_NONE:
    194    FALLTHROUGH;
    195  default:
    196    tor_assert_unreached();
    197    return "(N/A)";
    198  }
    199 }
    200 
    201 /**
    202 * Scheduler event callback; this should get triggered once per event loop
    203 * if any scheduling work was created during the event loop.
    204 */
    205 static void
    206 scheduler_evt_callback(mainloop_event_t *event, void *arg)
    207 {
    208  (void) event;
    209  (void) arg;
    210 
    211  log_debug(LD_SCHED, "Scheduler event callback called");
    212 
    213  /* Run the scheduler. This is a mandatory function. */
    214 
    215  /* We might as well assert on this. If this function doesn't exist, no cells
    216   * are getting scheduled. Things are very broken. scheduler_t says the run()
    217   * function is mandatory. */
    218  tor_assert(the_scheduler->run);
    219  the_scheduler->run();
    220 
    221  /* Schedule itself back in if it has more work. */
    222 
    223  /* Again, might as well assert on this mandatory scheduler_t function. If it
    224   * doesn't exist, there's no way to tell libevent to run the scheduler again
    225   * in the future. */
    226  tor_assert(the_scheduler->schedule);
    227  the_scheduler->schedule();
    228 }
    229 
    230 /** Using the global options, select the scheduler we should be using. */
    231 static void
    232 select_scheduler(void)
    233 {
    234  scheduler_t *new_scheduler = NULL;
    235 
    236 #ifdef TOR_UNIT_TESTS
    237  /* This is hella annoying to set in the options for every test that passes
    238   * through the scheduler and there are many so if we don't explicitly have
    239   * a list of types set, just put the vanilla one. */
    240  if (get_options()->SchedulerTypes_ == NULL) {
    241    the_scheduler = get_vanilla_scheduler();
    242    return;
    243  }
    244 #endif /* defined(TOR_UNIT_TESTS) */
    245 
    246  /* This list is ordered that is first entry has the first priority. Thus, as
    247   * soon as we find a scheduler type that we can use, we use it and stop. */
    248  SMARTLIST_FOREACH_BEGIN(get_options()->SchedulerTypes_, int *, type) {
    249    switch (*type) {
    250    case SCHEDULER_VANILLA:
    251      new_scheduler = get_vanilla_scheduler();
    252      goto end;
    253    case SCHEDULER_KIST:
    254      if (!scheduler_can_use_kist()) {
    255 #ifdef HAVE_KIST_SUPPORT
    256        if (!have_logged_kist_suddenly_disabled) {
    257          /* We should only log this once in most cases. If it was the kernel
    258           * losing support for kist that caused scheduler_can_use_kist() to
    259           * return false, then this flag makes sure we only log this message
    260           * once. If it was the consensus that switched from "yes use kist"
    261           * to "no don't use kist", then we still set the flag so we log
    262           * once, but we unset the flag elsewhere if we ever can_use_kist()
    263           * again.
    264           */
    265          have_logged_kist_suddenly_disabled = 1;
    266          log_notice(LD_SCHED, "Scheduler type KIST has been disabled by "
    267                               "the consensus or no kernel support.");
    268        }
    269 #else /* !defined(HAVE_KIST_SUPPORT) */
    270        log_info(LD_SCHED, "Scheduler type KIST not built in");
    271 #endif /* defined(HAVE_KIST_SUPPORT) */
    272        continue;
    273      }
    274      /* This flag will only get set in one of two cases:
    275       * 1 - the kernel lost support for kist. In that case, we don't expect to
    276       *     ever end up here
    277       * 2 - the consensus went from "yes use kist" to "no don't use kist".
    278       * We might end up here if the consensus changes back to "yes", in which
    279       * case we might want to warn the user again if it goes back to "no"
    280       * yet again. Thus we unset the flag */
    281      have_logged_kist_suddenly_disabled = 0;
    282      new_scheduler = get_kist_scheduler();
    283      scheduler_kist_set_full_mode();
    284      goto end;
    285    case SCHEDULER_KIST_LITE:
    286      new_scheduler = get_kist_scheduler();
    287      scheduler_kist_set_lite_mode();
    288      goto end;
    289    case SCHEDULER_NONE:
    290      FALLTHROUGH;
    291    default:
    292      /* Our option validation should have caught this. */
    293      tor_assert_unreached();
    294    }
    295  } SMARTLIST_FOREACH_END(type);
    296 
    297 end:
    298  if (new_scheduler == NULL) {
    299    log_err(LD_SCHED, "Tor was unable to select a scheduler type. Please "
    300                      "make sure Schedulers is correctly configured with "
    301                      "what Tor does support.");
    302    /* We weren't able to choose a scheduler which means that none of the ones
    303     * set in Schedulers are supported or usable. We will respect the user
    304     * wishes of using what it has been configured and don't do a sneaky
    305     * fallback. Because this can be changed at runtime, we have to stop tor
    306     * right now. */
    307    exit(1); // XXXX bad exit
    308  }
    309 
    310  /* Set the chosen scheduler. */
    311  the_scheduler = new_scheduler;
    312 }
    313 
    314 /**
    315 * Helper function called from a few different places. It changes the
    316 * scheduler implementation, if necessary. And if it did, it then tells the
    317 * old one to free its state and the new one to initialize.
    318 */
    319 static void
    320 set_scheduler(void)
    321 {
    322  const scheduler_t *old_scheduler = the_scheduler;
    323  scheduler_types_t old_scheduler_type = SCHEDULER_NONE;
    324 
    325  /* We keep track of the type in order to log only if the type switched. We
    326   * can't just use the scheduler pointers because KIST and KISTLite share the
    327   * same object. */
    328  if (the_scheduler) {
    329    old_scheduler_type = the_scheduler->type;
    330  }
    331 
    332  /* From the options, select the scheduler type to set. */
    333  select_scheduler();
    334  tor_assert(the_scheduler);
    335 
    336  /* We look at the pointer difference in case the old sched and new sched
    337   * share the same scheduler object, as is the case with KIST and KISTLite. */
    338  if (old_scheduler != the_scheduler) {
    339    /* Allow the old scheduler to clean up, if needed. */
    340    if (old_scheduler && old_scheduler->free_all) {
    341      old_scheduler->free_all();
    342    }
    343 
    344    /* Initialize the new scheduler. */
    345    if (the_scheduler->init) {
    346      the_scheduler->init();
    347    }
    348  }
    349 
    350  /* Finally we notice log if we switched schedulers. We use the type in case
    351   * two schedulers share a scheduler object. */
    352  if (old_scheduler_type != the_scheduler->type) {
    353    log_info(LD_CONFIG, "Scheduler type %s has been enabled.",
    354             get_scheduler_type_string(the_scheduler->type));
    355  }
    356 }
    357 
    358 /*****************************************************************************
    359 * Scheduling system private function definitions
    360 *
    361 * Functions that can only be accessed from scheduler*.c
    362 *****************************************************************************/
    363 
    364 /** Returns human readable string for the given channel scheduler state. */
    365 const char *
    366 get_scheduler_state_string(int scheduler_state)
    367 {
    368  switch (scheduler_state) {
    369  case SCHED_CHAN_IDLE:
    370    return "IDLE";
    371  case SCHED_CHAN_WAITING_FOR_CELLS:
    372    return "WAITING_FOR_CELLS";
    373  case SCHED_CHAN_WAITING_TO_WRITE:
    374    return "WAITING_TO_WRITE";
    375  case SCHED_CHAN_PENDING:
    376    return "PENDING";
    377  default:
    378    return "(invalid)";
    379  }
    380 }
    381 
    382 /** Helper that logs channel scheduler_state changes. Use this instead of
    383 * setting scheduler_state directly. */
    384 void
    385 scheduler_set_channel_state(channel_t *chan, int new_state)
    386 {
    387  log_debug(LD_SCHED, "chan %" PRIu64 " changed from scheduler state %s to %s",
    388      chan->global_identifier,
    389      get_scheduler_state_string(chan->scheduler_state),
    390      get_scheduler_state_string(new_state));
    391  chan->scheduler_state = new_state;
    392 }
    393 
    394 /** Return the pending channel list. */
    395 smartlist_t *
    396 get_channels_pending(void)
    397 {
    398  return channels_pending;
    399 }
    400 
    401 /** Comparison function to use when sorting pending channels. */
    402 MOCK_IMPL(int,
    403 scheduler_compare_channels, (const void *c1_v, const void *c2_v))
    404 {
    405  const channel_t *c1 = NULL, *c2 = NULL;
    406  /* These are a workaround for -Wbad-function-cast throwing a fit */
    407  const circuitmux_policy_t *p1, *p2;
    408  uintptr_t p1_i, p2_i;
    409 
    410  tor_assert(c1_v);
    411  tor_assert(c2_v);
    412 
    413  c1 = (const channel_t *)(c1_v);
    414  c2 = (const channel_t *)(c2_v);
    415 
    416  if (c1 != c2) {
    417    if (circuitmux_get_policy(c1->cmux) ==
    418        circuitmux_get_policy(c2->cmux)) {
    419      /* Same cmux policy, so use the mux comparison */
    420      return circuitmux_compare_muxes(c1->cmux, c2->cmux);
    421    } else {
    422      /*
    423       * Different policies; not important to get this edge case perfect
    424       * because the current code never actually gives different channels
    425       * different cmux policies anyway.  Just use this arbitrary but
    426       * definite choice.
    427       */
    428      p1 = circuitmux_get_policy(c1->cmux);
    429      p2 = circuitmux_get_policy(c2->cmux);
    430      p1_i = (uintptr_t)p1;
    431      p2_i = (uintptr_t)p2;
    432 
    433      return (p1_i < p2_i) ? -1 : 1;
    434    }
    435  } else {
    436    /* c1 == c2, so always equal */
    437    return 0;
    438  }
    439 }
    440 
    441 /*****************************************************************************
    442 * Scheduling system global functions
    443 *
    444 * Functions that can be accessed from anywhere in Tor.
    445 *****************************************************************************/
    446 
    447 /**
    448 * This is how the scheduling system is notified of Tor's configuration
    449 * changing. For example: a SIGHUP was issued.
    450 */
    451 void
    452 scheduler_conf_changed(void)
    453 {
    454  /* Let the scheduler decide what it should do. */
    455  set_scheduler();
    456 
    457  /* Then tell the (possibly new) scheduler that we have new options. */
    458  if (the_scheduler->on_new_options) {
    459    the_scheduler->on_new_options();
    460  }
    461 }
    462 
    463 /**
    464 * Whenever we get a new consensus, this function is called.
    465 */
    466 void
    467 scheduler_notify_networkstatus_changed(void)
    468 {
    469  /* Maybe the consensus param made us change the scheduler. */
    470  set_scheduler();
    471 
    472  /* Then tell the (possibly new) scheduler that we have a new consensus */
    473  if (the_scheduler->on_new_consensus) {
    474    the_scheduler->on_new_consensus();
    475  }
    476 }
    477 
    478 /**
    479 * Free everything scheduling-related from main.c. Note this is only called
    480 * when Tor is shutting down, while scheduler_t->free_all() is called both when
    481 * Tor is shutting down and when we are switching schedulers.
    482 */
    483 void
    484 scheduler_free_all(void)
    485 {
    486  log_debug(LD_SCHED, "Shutting down scheduler");
    487 
    488  if (run_sched_ev) {
    489    mainloop_event_free(run_sched_ev);
    490    run_sched_ev = NULL;
    491  }
    492 
    493  if (channels_pending) {
    494    /* We don't have ownership of the objects in this list. */
    495    smartlist_free(channels_pending);
    496    channels_pending = NULL;
    497  }
    498 
    499  if (the_scheduler && the_scheduler->free_all) {
    500    the_scheduler->free_all();
    501  }
    502  the_scheduler = NULL;
    503 }
    504 
    505 /** Mark a channel as no longer ready to accept writes.
    506  *
    507  * Possible state changes:
    508  *  - SCHED_CHAN_PENDING -> SCHED_CHAN_WAITING_TO_WRITE
    509  *  - SCHED_CHAN_WAITING_FOR_CELLS -> SCHED_CHAN_IDLE
    510  */
    511 MOCK_IMPL(void,
    512 scheduler_channel_doesnt_want_writes,(channel_t *chan))
    513 {
    514  IF_BUG_ONCE(!chan) {
    515    return;
    516  }
    517  IF_BUG_ONCE(!channels_pending) {
    518    return;
    519  }
    520 
    521  if (chan->scheduler_state == SCHED_CHAN_PENDING) {
    522    /*
    523     * It has cells but no longer can write, so it becomes
    524     * SCHED_CHAN_WAITING_TO_WRITE. It's in channels_pending, so we
    525     * should remove it from the list.
    526     */
    527    smartlist_pqueue_remove(channels_pending,
    528                            scheduler_compare_channels,
    529                            offsetof(channel_t, sched_heap_idx),
    530                            chan);
    531    scheduler_set_channel_state(chan, SCHED_CHAN_WAITING_TO_WRITE);
    532  } else if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) {
    533    /*
    534     * It does not have cells and no longer can write, so it becomes
    535     * SCHED_CHAN_IDLE.
    536     */
    537    scheduler_set_channel_state(chan, SCHED_CHAN_IDLE);
    538  }
    539 }
    540 
    541 /** Mark a channel as having waiting cells.
    542  *
    543  * Possible state changes:
    544  *  - SCHED_CHAN_WAITING_FOR_CELLS -> SCHED_CHAN_PENDING
    545  *  - SCHED_CHAN_IDLE -> SCHED_CHAN_WAITING_TO_WRITE
    546  */
    547 MOCK_IMPL(void,
    548 scheduler_channel_has_waiting_cells,(channel_t *chan))
    549 {
    550  IF_BUG_ONCE(!chan) {
    551    return;
    552  }
    553  IF_BUG_ONCE(!channels_pending) {
    554    return;
    555  }
    556 
    557  if (chan->scheduler_state == SCHED_CHAN_WAITING_FOR_CELLS) {
    558    /*
    559     * It is able to write and now has cells, so it becomes
    560     * SCHED_CHAN_PENDING. It must be added to the channels_pending
    561     * list.
    562     */
    563    scheduler_set_channel_state(chan, SCHED_CHAN_PENDING);
    564    if (!SCHED_BUG(chan->sched_heap_idx != -1, chan)) {
    565      smartlist_pqueue_add(channels_pending,
    566                           scheduler_compare_channels,
    567                           offsetof(channel_t, sched_heap_idx),
    568                           chan);
    569    }
    570    /* If we made a channel pending, we potentially have scheduling work to
    571     * do. */
    572    the_scheduler->schedule();
    573  } else if (chan->scheduler_state == SCHED_CHAN_IDLE) {
    574    /*
    575     * It is not able to write but now has cells, so it becomes
    576     * SCHED_CHAN_WAITING_TO_WRITE.
    577     */
    578    scheduler_set_channel_state(chan, SCHED_CHAN_WAITING_TO_WRITE);
    579  }
    580 }
    581 
    582 /** Add the scheduler event to the set of pending events with next_run being
    583 * the longest time libevent should wait before triggering the event. */
    584 void
    585 scheduler_ev_add(const struct timeval *next_run)
    586 {
    587  tor_assert(run_sched_ev);
    588  tor_assert(next_run);
    589  if (BUG(mainloop_event_schedule(run_sched_ev, next_run) < 0)) {
    590    log_warn(LD_SCHED, "Adding to libevent failed. Next run time was set to: "
    591                       "%ld.%06ld", next_run->tv_sec, (long)next_run->tv_usec);
    592    return;
    593  }
    594 }
    595 
    596 /** Make the scheduler event active with the given flags. */
    597 void
    598 scheduler_ev_active(void)
    599 {
    600  tor_assert(run_sched_ev);
    601  mainloop_event_activate(run_sched_ev);
    602 }
    603 
    604 /*
    605 * Initialize everything scheduling-related from config.c. Note this is only
    606 * called when Tor is starting up, while scheduler_t->init() is called both
    607 * when Tor is starting up and when we are switching schedulers.
    608 */
    609 void
    610 scheduler_init(void)
    611 {
    612  log_debug(LD_SCHED, "Initting scheduler");
    613 
    614  // Two '!' because we really do want to check if the pointer is non-NULL
    615  IF_BUG_ONCE(!!run_sched_ev) {
    616    log_warn(LD_SCHED, "We should not already have a libevent scheduler event."
    617             "I'll clean the old one up, but this is odd.");
    618    mainloop_event_free(run_sched_ev);
    619    run_sched_ev = NULL;
    620  }
    621  run_sched_ev = mainloop_event_new(scheduler_evt_callback, NULL);
    622  channels_pending = smartlist_new();
    623 
    624  set_scheduler();
    625 }
    626 
    627 /*
    628 * If a channel is going away, this is how the scheduling system is informed
    629 * so it can do any freeing necessary. This ultimately calls
    630 * scheduler_t->on_channel_free() so the current scheduler can release any
    631 * state specific to this channel.
    632 */
    633 MOCK_IMPL(void,
    634 scheduler_release_channel,(channel_t *chan))
    635 {
    636  IF_BUG_ONCE(!chan) {
    637    return;
    638  }
    639  IF_BUG_ONCE(!channels_pending) {
    640    return;
    641  }
    642 
    643  /* Try to remove the channel from the pending list regardless of its
    644   * scheduler state. We can release a channel in many places in the tor code
    645   * so we can't rely on the channel state (PENDING) to remove it from the
    646   * list.
    647   *
    648   * For instance, the channel can change state from OPEN to CLOSING while
    649   * being handled in the scheduler loop leading to the channel being in
    650   * PENDING state but not in the pending list. Furthermore, we release the
    651   * channel when it changes state to close and a second time when we free it.
    652   * Not ideal at all but for now that is the way it is. */
    653  if (chan->sched_heap_idx != -1) {
    654    smartlist_pqueue_remove(channels_pending,
    655                            scheduler_compare_channels,
    656                            offsetof(channel_t, sched_heap_idx),
    657                            chan);
    658  }
    659 
    660  if (the_scheduler->on_channel_free) {
    661    the_scheduler->on_channel_free(chan);
    662  }
    663  scheduler_set_channel_state(chan, SCHED_CHAN_IDLE);
    664 }
    665 
    666 /** Mark a channel as ready to accept writes.
    667  * Possible state changes:
    668  *
    669  *  - SCHED_CHAN_WAITING_TO_WRITE -> SCHED_CHAN_PENDING
    670  *  - SCHED_CHAN_IDLE -> SCHED_CHAN_WAITING_FOR_CELLS
    671  */
    672 void
    673 scheduler_channel_wants_writes(channel_t *chan)
    674 {
    675  IF_BUG_ONCE(!chan) {
    676    return;
    677  }
    678  IF_BUG_ONCE(!channels_pending) {
    679    return;
    680  }
    681 
    682  if (chan->scheduler_state == SCHED_CHAN_WAITING_TO_WRITE) {
    683    /*
    684     * It has cells and can now write, so it becomes
    685     * SCHED_CHAN_PENDING. It must be added to the channels_pending
    686     * list.
    687     */
    688    scheduler_set_channel_state(chan, SCHED_CHAN_PENDING);
    689    if (!SCHED_BUG(chan->sched_heap_idx != -1, chan)) {
    690      smartlist_pqueue_add(channels_pending,
    691                           scheduler_compare_channels,
    692                           offsetof(channel_t, sched_heap_idx),
    693                           chan);
    694    }
    695    /* We just made a channel pending, we have scheduling work to do. */
    696    the_scheduler->schedule();
    697  } else if (chan->scheduler_state == SCHED_CHAN_IDLE) {
    698    /*
    699     * It does not have cells but can now write, so it becomes
    700     * SCHED_CHAN_WAITING_FOR_CELLS.
    701     */
    702    scheduler_set_channel_state(chan, SCHED_CHAN_WAITING_FOR_CELLS);
    703  }
    704 }
    705 
    706 /* Log warn the given channel and extra scheduler context as well. This is
    707 * used by SCHED_BUG() in order to be able to extract as much information as
    708 * we can when we hit a bug. Channel chan can be NULL. */
    709 void
    710 scheduler_bug_occurred(const channel_t *chan)
    711 {
    712  char buf[128];
    713 
    714  if (chan != NULL) {
    715    const size_t outbuf_len =
    716      buf_datalen(TO_CONN(CONST_BASE_CHAN_TO_TLS(chan)->conn)->outbuf);
    717    tor_snprintf(buf, sizeof(buf),
    718                 "Channel %" PRIu64 " in state %s and scheduler state %s."
    719                 " Num cells on cmux: %d. Connection outbuf len: %lu.",
    720                 chan->global_identifier,
    721                 channel_state_to_string(chan->state),
    722                 get_scheduler_state_string(chan->scheduler_state),
    723                 circuitmux_num_cells(chan->cmux),
    724                 (unsigned long)outbuf_len);
    725  }
    726 
    727  {
    728    char *msg;
    729    /* Rate limit every 60 seconds. If we start seeing this every 60 sec, we
    730     * know something is stuck/wrong. It *should* be loud but not too much. */
    731    static ratelim_t rlimit = RATELIM_INIT(60);
    732    if ((msg = rate_limit_log(&rlimit, approx_time()))) {
    733      log_warn(LD_BUG, "%s Num pending channels: %d. "
    734                       "Channel in pending list: %s.%s",
    735               (chan != NULL) ? buf : "No channel in bug context.",
    736               smartlist_len(channels_pending),
    737               (smartlist_pos(channels_pending, chan) == -1) ? "no" : "yes",
    738               msg);
    739      tor_free(msg);
    740    }
    741  }
    742 }
    743 
    744 #ifdef TOR_UNIT_TESTS
    745 
    746 /*
    747 * Notify scheduler that a channel's queue position may have changed.
    748 */
    749 void
    750 scheduler_touch_channel(channel_t *chan)
    751 {
    752  IF_BUG_ONCE(!chan) {
    753    return;
    754  }
    755 
    756  if (chan->scheduler_state == SCHED_CHAN_PENDING) {
    757    /* Remove and re-add it */
    758    smartlist_pqueue_remove(channels_pending,
    759                            scheduler_compare_channels,
    760                            offsetof(channel_t, sched_heap_idx),
    761                            chan);
    762    smartlist_pqueue_add(channels_pending,
    763                         scheduler_compare_channels,
    764                         offsetof(channel_t, sched_heap_idx),
    765                         chan);
    766  }
    767  /* else no-op, since it isn't in the queue */
    768 }
    769 
    770 #endif /* defined(TOR_UNIT_TESTS) */