tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

bloom.rs (14443B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
      4 
      5 //! The style bloom filter is used as an optimization when matching deep
      6 //! descendant selectors.
      7 
      8 #![deny(missing_docs)]
      9 
     10 use crate::dom::{SendElement, TElement};
     11 use crate::LocalName;
     12 use atomic_refcell::{AtomicRefCell, AtomicRefMut};
     13 use selectors::bloom::BloomFilter;
     14 use smallvec::SmallVec;
     15 
     16 thread_local! {
     17    /// Bloom filters are large allocations, so we store them in thread-local storage
     18    /// such that they can be reused across style traversals. StyleBloom is responsible
     19    /// for ensuring that the bloom filter is zeroed when it is dropped.
     20    ///
     21    /// We intentionally leak this from TLS because we don't have the guarantee
     22    /// of TLS destructors to run in worker threads.
     23    ///
     24    /// Also, leaking it guarantees that we can borrow it indefinitely.
     25    ///
     26    /// We could change this once https://github.com/rayon-rs/rayon/issues/688
     27    /// is fixed, hopefully, which point we'd need to change the filter member below to be an
     28    /// arc and carry an owning reference around or so.
     29    static BLOOM_KEY: &'static AtomicRefCell<BloomFilter> = Box::leak(Default::default());
     30 }
     31 
     32 /// A struct that allows us to fast-reject deep descendant selectors avoiding
     33 /// selector-matching.
     34 ///
     35 /// This is implemented using a counting bloom filter, and it's a standard
     36 /// optimization. See Gecko's `AncestorFilter`, and Blink's and WebKit's
     37 /// `SelectorFilter`.
     38 ///
     39 /// The constraints for Servo's style system are a bit different compared to
     40 /// traditional style systems given Servo does a parallel breadth-first
     41 /// traversal instead of a sequential depth-first traversal.
     42 ///
     43 /// This implies that we need to track a bit more state than other browsers to
     44 /// ensure we're doing the correct thing during the traversal, and being able to
     45 /// apply this optimization effectively.
     46 ///
     47 /// Concretely, we have a bloom filter instance per worker thread, and we track
     48 /// the current DOM depth in order to find a common ancestor when it doesn't
     49 /// match the previous element we've styled.
     50 ///
     51 /// This is usually a pretty fast operation (we use to be one level deeper than
     52 /// the previous one), but in the case of work-stealing, we may needed to push
     53 /// and pop multiple elements.
     54 ///
     55 /// See the `insert_parents_recovering`, where most of the magic happens.
     56 ///
     57 /// Regarding thread-safety, this struct is safe because:
     58 ///
     59 ///  * We clear this after a restyle.
     60 ///  * The DOM shape and attributes (and every other thing we access here) are
     61 ///    immutable during a restyle.
     62 ///
     63 pub struct StyleBloom<E: TElement> {
     64    /// A handle to the bloom filter from the thread upon which this StyleBloom
     65    /// was created. We use AtomicRefCell so that this is all |Send|, which allows
     66    /// StyleBloom to live in ThreadLocalStyleContext, which is dropped from the
     67    /// parent thread.
     68    filter: AtomicRefMut<'static, BloomFilter>,
     69 
     70    /// The stack of elements that this bloom filter contains, along with the
     71    /// number of hashes pushed for each element.
     72    elements: SmallVec<[PushedElement<E>; 16]>,
     73 
     74    /// Stack of hashes that have been pushed onto this filter.
     75    pushed_hashes: SmallVec<[u32; 64]>,
     76 }
     77 
     78 /// The very rough benchmarks in the selectors crate show clear()
     79 /// costing about 25 times more than remove_hash(). We use this to implement
     80 /// clear() more efficiently when only a small number of hashes have been
     81 /// pushed.
     82 ///
     83 /// One subtly to note is that remove_hash() will not touch the value
     84 /// if the filter overflowed. However, overflow can only occur if we
     85 /// get 255 collisions on the same hash value, and 25 < 255.
     86 const MEMSET_CLEAR_THRESHOLD: usize = 25;
     87 
     88 struct PushedElement<E: TElement> {
     89    /// The element that was pushed.
     90    element: SendElement<E>,
     91 
     92    /// The number of hashes pushed for the element.
     93    num_hashes: usize,
     94 }
     95 
     96 impl<E: TElement> PushedElement<E> {
     97    fn new(el: E, num_hashes: usize) -> Self {
     98        PushedElement {
     99            element: unsafe { SendElement::new(el) },
    100            num_hashes,
    101        }
    102    }
    103 }
    104 
    105 /// Returns whether the attribute name is excluded from the bloom filter.
    106 ///
    107 /// We do this for attributes that are very common but not commonly used in
    108 /// selectors.
    109 #[inline]
    110 pub fn is_attr_name_excluded_from_filter(name: &LocalName) -> bool {
    111    *name == local_name!("class") || *name == local_name!("id") || *name == local_name!("style")
    112 }
    113 
    114 /// Gather all relevant hash for fast-reject filters from an element.
    115 pub fn each_relevant_element_hash<E, F>(element: E, mut f: F)
    116 where
    117    E: TElement,
    118    F: FnMut(u32),
    119 {
    120    f(element.local_name().get_hash());
    121    f(element.namespace().get_hash());
    122 
    123    if let Some(id) = element.id() {
    124        f(id.get_hash());
    125    }
    126 
    127    element.each_class(|class| f(class.get_hash()));
    128 
    129    element.each_attr_name(|name| {
    130        if !is_attr_name_excluded_from_filter(name) {
    131            f(name.get_hash())
    132        }
    133    });
    134 }
    135 
    136 impl<E: TElement> Drop for StyleBloom<E> {
    137    fn drop(&mut self) {
    138        // Leave the reusable bloom filter in a zeroed state.
    139        self.clear();
    140    }
    141 }
    142 
    143 impl<E: TElement> StyleBloom<E> {
    144    /// Create an empty `StyleBloom`. Because StyleBloom acquires the thread-
    145    /// local filter buffer, creating multiple live StyleBloom instances at
    146    /// the same time on the same thread will panic.
    147 
    148    // Forced out of line to limit stack frame sizes after extra inlining from
    149    // https://github.com/rust-lang/rust/pull/43931
    150    //
    151    // See https://github.com/servo/servo/pull/18420#issuecomment-328769322
    152    #[inline(never)]
    153    pub fn new() -> Self {
    154        let filter = BLOOM_KEY.with(|b| b.borrow_mut());
    155        debug_assert!(
    156            filter.is_zeroed(),
    157            "Forgot to zero the bloom filter last time"
    158        );
    159        StyleBloom {
    160            filter,
    161            elements: Default::default(),
    162            pushed_hashes: Default::default(),
    163        }
    164    }
    165 
    166    /// Return the bloom filter used properly by the `selectors` crate.
    167    pub fn filter(&self) -> &BloomFilter {
    168        &*self.filter
    169    }
    170 
    171    /// Push an element to the bloom filter, knowing that it's a child of the
    172    /// last element parent.
    173    pub fn push(&mut self, element: E) {
    174        if cfg!(debug_assertions) {
    175            if self.elements.is_empty() {
    176                assert!(element.traversal_parent().is_none());
    177            }
    178        }
    179        self.push_internal(element);
    180    }
    181 
    182    /// Same as `push`, but without asserting, in order to use it from
    183    /// `rebuild`.
    184    fn push_internal(&mut self, element: E) {
    185        let mut count = 0;
    186        each_relevant_element_hash(element, |hash| {
    187            count += 1;
    188            self.filter.insert_hash(hash);
    189            self.pushed_hashes.push(hash);
    190        });
    191        self.elements.push(PushedElement::new(element, count));
    192    }
    193 
    194    /// Pop the last element in the bloom filter and return it.
    195    #[inline]
    196    fn pop(&mut self) -> Option<E> {
    197        let PushedElement {
    198            element,
    199            num_hashes,
    200        } = self.elements.pop()?;
    201        let popped_element = *element;
    202 
    203        // Verify that the pushed hashes match the ones we'd get from the element.
    204        let mut expected_hashes = vec![];
    205        if cfg!(debug_assertions) {
    206            each_relevant_element_hash(popped_element, |hash| expected_hashes.push(hash));
    207        }
    208 
    209        for _ in 0..num_hashes {
    210            let hash = self.pushed_hashes.pop().unwrap();
    211            debug_assert_eq!(expected_hashes.pop().unwrap(), hash);
    212            self.filter.remove_hash(hash);
    213        }
    214 
    215        Some(popped_element)
    216    }
    217 
    218    /// Returns the DOM depth of elements that can be correctly
    219    /// matched against the bloom filter (that is, the number of
    220    /// elements in our list).
    221    pub fn matching_depth(&self) -> usize {
    222        self.elements.len()
    223    }
    224 
    225    /// Clears the bloom filter.
    226    pub fn clear(&mut self) {
    227        self.elements.clear();
    228 
    229        if self.pushed_hashes.len() > MEMSET_CLEAR_THRESHOLD {
    230            self.filter.clear();
    231            self.pushed_hashes.clear();
    232        } else {
    233            for hash in self.pushed_hashes.drain(..) {
    234                self.filter.remove_hash(hash);
    235            }
    236            debug_assert!(self.filter.is_zeroed());
    237        }
    238    }
    239 
    240    /// Rebuilds the bloom filter up to the parent of the given element.
    241    pub fn rebuild(&mut self, mut element: E) {
    242        self.clear();
    243 
    244        let mut parents_to_insert = SmallVec::<[E; 16]>::new();
    245        while let Some(parent) = element.traversal_parent() {
    246            parents_to_insert.push(parent);
    247            element = parent;
    248        }
    249 
    250        for parent in parents_to_insert.drain(..).rev() {
    251            self.push(parent);
    252        }
    253    }
    254 
    255    /// In debug builds, asserts that all the parents of `element` are in the
    256    /// bloom filter.
    257    ///
    258    /// Goes away in release builds.
    259    pub fn assert_complete(&self, mut element: E) {
    260        if cfg!(debug_assertions) {
    261            let mut checked = 0;
    262            while let Some(parent) = element.traversal_parent() {
    263                assert_eq!(
    264                    parent,
    265                    *(self.elements[self.elements.len() - 1 - checked].element)
    266                );
    267                element = parent;
    268                checked += 1;
    269            }
    270            assert_eq!(checked, self.elements.len());
    271        }
    272    }
    273 
    274    /// Get the element that represents the chain of things inserted
    275    /// into the filter right now.  That chain is the given element
    276    /// (if any) and its ancestors.
    277    #[inline]
    278    pub fn current_parent(&self) -> Option<E> {
    279        self.elements.last().map(|ref el| *el.element)
    280    }
    281 
    282    /// Insert the parents of an element in the bloom filter, trying to recover
    283    /// the filter if the last element inserted doesn't match.
    284    ///
    285    /// Gets the element depth in the dom, to make it efficient, or if not
    286    /// provided always rebuilds the filter from scratch.
    287    ///
    288    /// Returns the new bloom filter depth, that the traversal code is
    289    /// responsible to keep around if it wants to get an effective filter.
    290    pub fn insert_parents_recovering(&mut self, element: E, element_depth: usize) {
    291        // Easy case, we're in a different restyle, or we're empty.
    292        if self.elements.is_empty() {
    293            self.rebuild(element);
    294            return;
    295        }
    296 
    297        let traversal_parent = match element.traversal_parent() {
    298            Some(parent) => parent,
    299            None => {
    300                // Yay, another easy case.
    301                self.clear();
    302                return;
    303            },
    304        };
    305 
    306        if self.current_parent() == Some(traversal_parent) {
    307            // Ta da, cache hit, we're all done.
    308            return;
    309        }
    310 
    311        if element_depth == 0 {
    312            self.clear();
    313            return;
    314        }
    315 
    316        // We should've early exited above.
    317        debug_assert!(
    318            element_depth != 0,
    319            "We should have already cleared the bloom filter"
    320        );
    321        debug_assert!(!self.elements.is_empty(), "How! We should've just rebuilt!");
    322 
    323        // Now the fun begins: We have the depth of the dom and the depth of the
    324        // last element inserted in the filter, let's try to find a common
    325        // parent.
    326        //
    327        // The current depth, that is, the depth of the last element inserted in
    328        // the bloom filter, is the number of elements _minus one_, that is: if
    329        // there's one element, it must be the root -> depth zero.
    330        let mut current_depth = self.elements.len() - 1;
    331 
    332        // If the filter represents an element too deep in the dom, we need to
    333        // pop ancestors.
    334        while current_depth > element_depth - 1 {
    335            self.pop().expect("Emilio is bad at math");
    336            current_depth -= 1;
    337        }
    338 
    339        // Now let's try to find a common parent in the bloom filter chain,
    340        // starting with traversal_parent.
    341        let mut common_parent = traversal_parent;
    342        let mut common_parent_depth = element_depth - 1;
    343 
    344        // Let's collect the parents we are going to need to insert once we've
    345        // found the common one.
    346        let mut parents_to_insert = SmallVec::<[E; 16]>::new();
    347 
    348        // If the bloom filter still doesn't have enough elements, the common
    349        // parent is up in the dom.
    350        while common_parent_depth > current_depth {
    351            // TODO(emilio): Seems like we could insert parents here, then
    352            // reverse the slice.
    353            parents_to_insert.push(common_parent);
    354            common_parent = common_parent.traversal_parent().expect("We were lied to");
    355            common_parent_depth -= 1;
    356        }
    357 
    358        // Now the two depths are the same.
    359        debug_assert_eq!(common_parent_depth, current_depth);
    360 
    361        // Happy case: The parents match, we only need to push the ancestors
    362        // we've collected and we'll never enter in this loop.
    363        //
    364        // Not-so-happy case: Parent's don't match, so we need to keep going up
    365        // until we find a common ancestor.
    366        //
    367        // Gecko currently models native anonymous content that conceptually
    368        // hangs off the document (such as scrollbars) as a separate subtree
    369        // from the document root.
    370        //
    371        // Thus it's possible with Gecko that we do not find any common
    372        // ancestor.
    373        while *(self.elements.last().unwrap().element) != common_parent {
    374            parents_to_insert.push(common_parent);
    375            self.pop().unwrap();
    376            common_parent = match common_parent.traversal_parent() {
    377                Some(parent) => parent,
    378                None => {
    379                    debug_assert!(self.elements.is_empty());
    380                    if cfg!(feature = "gecko") {
    381                        break;
    382                    } else {
    383                        panic!("should have found a common ancestor");
    384                    }
    385                },
    386            }
    387        }
    388 
    389        // Now the parents match, so insert the stack of elements we have been
    390        // collecting so far.
    391        for parent in parents_to_insert.drain(..).rev() {
    392            self.push(parent);
    393        }
    394 
    395        debug_assert_eq!(self.elements.len(), element_depth);
    396 
    397        // We're done! Easy.
    398    }
    399 }