parallel.rs (7531B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ 4 5 //! Implements parallel traversal over the DOM tree. 6 //! 7 //! This traversal is based on Rayon, and therefore its safety is largely 8 //! verified by the type system. 9 //! 10 //! The primary trickiness and fine print for the above relates to the 11 //! thread safety of the DOM nodes themselves. Accessing a DOM element 12 //! concurrently on multiple threads is actually mostly "safe", since all 13 //! the mutable state is protected by an AtomicRefCell, and so we'll 14 //! generally panic if something goes wrong. Still, we try to to enforce our 15 //! thread invariants at compile time whenever possible. As such, TNode and 16 //! TElement are not Send, so ordinary style system code cannot accidentally 17 //! share them with other threads. In the parallel traversal, we explicitly 18 //! invoke |unsafe { SendNode::new(n) }| to put nodes in containers that may 19 //! be sent to other threads. This occurs in only a handful of places and is 20 //! easy to grep for. At the time of this writing, there is no other unsafe 21 //! code in the parallel traversal. 22 23 #![deny(missing_docs)] 24 25 use crate::context::{StyleContext, ThreadLocalStyleContext}; 26 use crate::dom::{OpaqueNode, SendNode, TElement}; 27 use crate::scoped_tls::ScopedTLS; 28 use crate::traversal::{DomTraversal, PerLevelTraversalData}; 29 use std::collections::VecDeque; 30 31 /// The minimum stack size for a thread in the styling pool, in kilobytes. 32 #[cfg(feature = "gecko")] 33 pub const STYLE_THREAD_STACK_SIZE_KB: usize = 256; 34 35 /// The minimum stack size for a thread in the styling pool, in kilobytes. 36 /// Servo requires a bigger stack in debug builds. 37 #[cfg(feature = "servo")] 38 pub const STYLE_THREAD_STACK_SIZE_KB: usize = 512; 39 40 /// The stack margin. If we get this deep in the stack, we will skip recursive 41 /// optimizations to ensure that there is sufficient room for non-recursive work. 42 /// 43 /// We allocate large safety margins because certain OS calls can use very large 44 /// amounts of stack space [1]. Reserving a larger-than-necessary stack costs us 45 /// address space, but if we keep our safety margin big, we will generally avoid 46 /// committing those extra pages, and only use them in edge cases that would 47 /// otherwise cause crashes. 48 /// 49 /// When measured with 128KB stacks and 40KB margin, we could support 53 50 /// levels of recursion before the limiter kicks in, on x86_64-Linux [2]. When 51 /// we doubled the stack size, we added it all to the safety margin, so we should 52 /// be able to get the same amount of recursion. 53 /// 54 /// [1] https://bugzilla.mozilla.org/show_bug.cgi?id=1395708#c15 55 /// [2] See Gecko bug 1376883 for more discussion on the measurements. 56 pub const STACK_SAFETY_MARGIN_KB: usize = 168; 57 58 /// A callback to create our thread local context. This needs to be 59 /// out of line so we don't allocate stack space for the entire struct 60 /// in the caller. 61 #[inline(never)] 62 pub(crate) fn create_thread_local_context<'scope, E>(slot: &mut Option<ThreadLocalStyleContext<E>>) 63 where 64 E: TElement + 'scope, 65 { 66 *slot = Some(ThreadLocalStyleContext::new()); 67 } 68 69 // Sends one chunk of work to the thread-pool. 70 fn distribute_one_chunk<'a, 'scope, E, D>( 71 items: VecDeque<SendNode<E::ConcreteNode>>, 72 traversal_root: OpaqueNode, 73 work_unit_max: usize, 74 traversal_data: PerLevelTraversalData, 75 scope: &'a rayon::ScopeFifo<'scope>, 76 traversal: &'scope D, 77 tls: &'scope ScopedTLS<'scope, ThreadLocalStyleContext<E>>, 78 ) where 79 E: TElement + 'scope, 80 D: DomTraversal<E>, 81 { 82 scope.spawn_fifo(move |scope| { 83 #[cfg(feature = "gecko")] 84 gecko_profiler_label!(Layout, StyleComputation); 85 let mut tlc = tls.ensure(create_thread_local_context); 86 let mut context = StyleContext { 87 shared: traversal.shared_context(), 88 thread_local: &mut *tlc, 89 }; 90 style_trees( 91 &mut context, 92 items, 93 traversal_root, 94 work_unit_max, 95 traversal_data, 96 Some(scope), 97 traversal, 98 tls, 99 ); 100 }) 101 } 102 103 /// Distributes all items into the thread pool, in `work_unit_max` chunks. 104 fn distribute_work<'a, 'scope, E, D>( 105 mut items: impl Iterator<Item = SendNode<E::ConcreteNode>>, 106 traversal_root: OpaqueNode, 107 work_unit_max: usize, 108 traversal_data: PerLevelTraversalData, 109 scope: &'a rayon::ScopeFifo<'scope>, 110 traversal: &'scope D, 111 tls: &'scope ScopedTLS<'scope, ThreadLocalStyleContext<E>>, 112 ) where 113 E: TElement + 'scope, 114 D: DomTraversal<E>, 115 { 116 use std::iter::FromIterator; 117 loop { 118 let chunk = VecDeque::from_iter(items.by_ref().take(work_unit_max)); 119 if chunk.is_empty() { 120 return; 121 } 122 distribute_one_chunk( 123 chunk, 124 traversal_root, 125 work_unit_max, 126 traversal_data, 127 scope, 128 traversal, 129 tls, 130 ); 131 } 132 } 133 134 /// Processes `discovered` items, possibly spawning work in other threads as needed. 135 #[inline] 136 pub fn style_trees<'a, 'scope, E, D>( 137 context: &mut StyleContext<E>, 138 mut discovered: VecDeque<SendNode<E::ConcreteNode>>, 139 traversal_root: OpaqueNode, 140 work_unit_max: usize, 141 mut traversal_data: PerLevelTraversalData, 142 scope: Option<&'a rayon::ScopeFifo<'scope>>, 143 traversal: &'scope D, 144 tls: &'scope ScopedTLS<'scope, ThreadLocalStyleContext<E>>, 145 ) where 146 E: TElement + 'scope, 147 D: DomTraversal<E>, 148 { 149 let local_queue_size = if tls.current_thread_index() == 0 { 150 static_prefs::pref!("layout.css.stylo-local-work-queue.in-main-thread") 151 } else { 152 static_prefs::pref!("layout.css.stylo-local-work-queue.in-worker") 153 } as usize; 154 155 let mut nodes_remaining_at_current_depth = discovered.len(); 156 while let Some(node) = discovered.pop_front() { 157 let mut children_to_process = 0isize; 158 traversal.process_preorder(&traversal_data, context, *node, |n| { 159 children_to_process += 1; 160 discovered.push_back(unsafe { SendNode::new(n) }); 161 }); 162 163 traversal.handle_postorder_traversal(context, traversal_root, *node, children_to_process); 164 165 nodes_remaining_at_current_depth -= 1; 166 167 // If we have enough children at the next depth in the DOM, spawn them to a different job 168 // relatively soon, while keeping always at least `local_queue_size` worth of work for 169 // ourselves. 170 let discovered_children = discovered.len() - nodes_remaining_at_current_depth; 171 if discovered_children >= work_unit_max 172 && discovered.len() >= local_queue_size + work_unit_max 173 && scope.is_some() 174 { 175 let kept_work = std::cmp::max(nodes_remaining_at_current_depth, local_queue_size); 176 let mut traversal_data_copy = traversal_data.clone(); 177 traversal_data_copy.current_dom_depth += 1; 178 distribute_work( 179 discovered.range(kept_work..).cloned(), 180 traversal_root, 181 work_unit_max, 182 traversal_data_copy, 183 scope.unwrap(), 184 traversal, 185 tls, 186 ); 187 discovered.truncate(kept_work); 188 } 189 190 if nodes_remaining_at_current_depth == 0 { 191 traversal_data.current_dom_depth += 1; 192 nodes_remaining_at_current_depth = discovered.len(); 193 } 194 } 195 }