tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

parallel.rs (7531B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
      4 
      5 //! Implements parallel traversal over the DOM tree.
      6 //!
      7 //! This traversal is based on Rayon, and therefore its safety is largely
      8 //! verified by the type system.
      9 //!
     10 //! The primary trickiness and fine print for the above relates to the
     11 //! thread safety of the DOM nodes themselves. Accessing a DOM element
     12 //! concurrently on multiple threads is actually mostly "safe", since all
     13 //! the mutable state is protected by an AtomicRefCell, and so we'll
     14 //! generally panic if something goes wrong. Still, we try to to enforce our
     15 //! thread invariants at compile time whenever possible. As such, TNode and
     16 //! TElement are not Send, so ordinary style system code cannot accidentally
     17 //! share them with other threads. In the parallel traversal, we explicitly
     18 //! invoke |unsafe { SendNode::new(n) }| to put nodes in containers that may
     19 //! be sent to other threads. This occurs in only a handful of places and is
     20 //! easy to grep for. At the time of this writing, there is no other unsafe
     21 //! code in the parallel traversal.
     22 
     23 #![deny(missing_docs)]
     24 
     25 use crate::context::{StyleContext, ThreadLocalStyleContext};
     26 use crate::dom::{OpaqueNode, SendNode, TElement};
     27 use crate::scoped_tls::ScopedTLS;
     28 use crate::traversal::{DomTraversal, PerLevelTraversalData};
     29 use std::collections::VecDeque;
     30 
     31 /// The minimum stack size for a thread in the styling pool, in kilobytes.
     32 #[cfg(feature = "gecko")]
     33 pub const STYLE_THREAD_STACK_SIZE_KB: usize = 256;
     34 
     35 /// The minimum stack size for a thread in the styling pool, in kilobytes.
     36 /// Servo requires a bigger stack in debug builds.
     37 #[cfg(feature = "servo")]
     38 pub const STYLE_THREAD_STACK_SIZE_KB: usize = 512;
     39 
     40 /// The stack margin. If we get this deep in the stack, we will skip recursive
     41 /// optimizations to ensure that there is sufficient room for non-recursive work.
     42 ///
     43 /// We allocate large safety margins because certain OS calls can use very large
     44 /// amounts of stack space [1]. Reserving a larger-than-necessary stack costs us
     45 /// address space, but if we keep our safety margin big, we will generally avoid
     46 /// committing those extra pages, and only use them in edge cases that would
     47 /// otherwise cause crashes.
     48 ///
     49 /// When measured with 128KB stacks and 40KB margin, we could support 53
     50 /// levels of recursion before the limiter kicks in, on x86_64-Linux [2]. When
     51 /// we doubled the stack size, we added it all to the safety margin, so we should
     52 /// be able to get the same amount of recursion.
     53 ///
     54 /// [1] https://bugzilla.mozilla.org/show_bug.cgi?id=1395708#c15
     55 /// [2] See Gecko bug 1376883 for more discussion on the measurements.
     56 pub const STACK_SAFETY_MARGIN_KB: usize = 168;
     57 
     58 /// A callback to create our thread local context.  This needs to be
     59 /// out of line so we don't allocate stack space for the entire struct
     60 /// in the caller.
     61 #[inline(never)]
     62 pub(crate) fn create_thread_local_context<'scope, E>(slot: &mut Option<ThreadLocalStyleContext<E>>)
     63 where
     64    E: TElement + 'scope,
     65 {
     66    *slot = Some(ThreadLocalStyleContext::new());
     67 }
     68 
     69 // Sends one chunk of work to the thread-pool.
     70 fn distribute_one_chunk<'a, 'scope, E, D>(
     71    items: VecDeque<SendNode<E::ConcreteNode>>,
     72    traversal_root: OpaqueNode,
     73    work_unit_max: usize,
     74    traversal_data: PerLevelTraversalData,
     75    scope: &'a rayon::ScopeFifo<'scope>,
     76    traversal: &'scope D,
     77    tls: &'scope ScopedTLS<'scope, ThreadLocalStyleContext<E>>,
     78 ) where
     79    E: TElement + 'scope,
     80    D: DomTraversal<E>,
     81 {
     82    scope.spawn_fifo(move |scope| {
     83        #[cfg(feature = "gecko")]
     84        gecko_profiler_label!(Layout, StyleComputation);
     85        let mut tlc = tls.ensure(create_thread_local_context);
     86        let mut context = StyleContext {
     87            shared: traversal.shared_context(),
     88            thread_local: &mut *tlc,
     89        };
     90        style_trees(
     91            &mut context,
     92            items,
     93            traversal_root,
     94            work_unit_max,
     95            traversal_data,
     96            Some(scope),
     97            traversal,
     98            tls,
     99        );
    100    })
    101 }
    102 
    103 /// Distributes all items into the thread pool, in `work_unit_max` chunks.
    104 fn distribute_work<'a, 'scope, E, D>(
    105    mut items: impl Iterator<Item = SendNode<E::ConcreteNode>>,
    106    traversal_root: OpaqueNode,
    107    work_unit_max: usize,
    108    traversal_data: PerLevelTraversalData,
    109    scope: &'a rayon::ScopeFifo<'scope>,
    110    traversal: &'scope D,
    111    tls: &'scope ScopedTLS<'scope, ThreadLocalStyleContext<E>>,
    112 ) where
    113    E: TElement + 'scope,
    114    D: DomTraversal<E>,
    115 {
    116    use std::iter::FromIterator;
    117    loop {
    118        let chunk = VecDeque::from_iter(items.by_ref().take(work_unit_max));
    119        if chunk.is_empty() {
    120            return;
    121        }
    122        distribute_one_chunk(
    123            chunk,
    124            traversal_root,
    125            work_unit_max,
    126            traversal_data,
    127            scope,
    128            traversal,
    129            tls,
    130        );
    131    }
    132 }
    133 
    134 /// Processes `discovered` items, possibly spawning work in other threads as needed.
    135 #[inline]
    136 pub fn style_trees<'a, 'scope, E, D>(
    137    context: &mut StyleContext<E>,
    138    mut discovered: VecDeque<SendNode<E::ConcreteNode>>,
    139    traversal_root: OpaqueNode,
    140    work_unit_max: usize,
    141    mut traversal_data: PerLevelTraversalData,
    142    scope: Option<&'a rayon::ScopeFifo<'scope>>,
    143    traversal: &'scope D,
    144    tls: &'scope ScopedTLS<'scope, ThreadLocalStyleContext<E>>,
    145 ) where
    146    E: TElement + 'scope,
    147    D: DomTraversal<E>,
    148 {
    149    let local_queue_size = if tls.current_thread_index() == 0 {
    150        static_prefs::pref!("layout.css.stylo-local-work-queue.in-main-thread")
    151    } else {
    152        static_prefs::pref!("layout.css.stylo-local-work-queue.in-worker")
    153    } as usize;
    154 
    155    let mut nodes_remaining_at_current_depth = discovered.len();
    156    while let Some(node) = discovered.pop_front() {
    157        let mut children_to_process = 0isize;
    158        traversal.process_preorder(&traversal_data, context, *node, |n| {
    159            children_to_process += 1;
    160            discovered.push_back(unsafe { SendNode::new(n) });
    161        });
    162 
    163        traversal.handle_postorder_traversal(context, traversal_root, *node, children_to_process);
    164 
    165        nodes_remaining_at_current_depth -= 1;
    166 
    167        // If we have enough children at the next depth in the DOM, spawn them to a different job
    168        // relatively soon, while keeping always at least `local_queue_size` worth of work for
    169        // ourselves.
    170        let discovered_children = discovered.len() - nodes_remaining_at_current_depth;
    171        if discovered_children >= work_unit_max
    172            && discovered.len() >= local_queue_size + work_unit_max
    173            && scope.is_some()
    174        {
    175            let kept_work = std::cmp::max(nodes_remaining_at_current_depth, local_queue_size);
    176            let mut traversal_data_copy = traversal_data.clone();
    177            traversal_data_copy.current_dom_depth += 1;
    178            distribute_work(
    179                discovered.range(kept_work..).cloned(),
    180                traversal_root,
    181                work_unit_max,
    182                traversal_data_copy,
    183                scope.unwrap(),
    184                traversal,
    185                tls,
    186            );
    187            discovered.truncate(kept_work);
    188        }
    189 
    190        if nodes_remaining_at_current_depth == 0 {
    191            traversal_data.current_dom_depth += 1;
    192            nodes_remaining_at_current_depth = discovered.len();
    193        }
    194    }
    195 }