tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

builder.rs (16711B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
      4 
      5 //! Helper module to build up a selector safely and efficiently.
      6 //!
      7 //! Our selector representation is designed to optimize matching, and has
      8 //! several requirements:
      9 //! * All simple selectors and combinators are stored inline in the same buffer as Component
     10 //!   instances.
     11 //! * We store the top-level compound selectors from right to left, i.e. in matching order.
     12 //! * We store the simple selectors for each combinator from left to right, so that we match the
     13 //!   cheaper simple selectors first.
     14 //!
     15 //! For example, the selector:
     16 //!
     17 //!   .bar:hover > .baz:nth-child(2) + .qux
     18 //!
     19 //! Gets stored as:
     20 //!
     21 //!   [.qux,  + , .baz, :nth-child(2),  > , .bar, :hover]
     22 //!
     23 //! Meeting all these constraints without extra memmove traffic during parsing is non-trivial. This
     24 //! module encapsulates those details and presents an easy-to-use API for the parser.
     25 
     26 use crate::parser::{
     27    Combinator, Component, ParseRelative, RelativeSelector, Selector, SelectorData, SelectorImpl,
     28 };
     29 use crate::sink::Push;
     30 use bitflags::bitflags;
     31 use derive_more::{Add, AddAssign};
     32 use servo_arc::Arc;
     33 use smallvec::SmallVec;
     34 use std::cmp;
     35 use std::slice;
     36 
     37 #[cfg(feature = "to_shmem")]
     38 use to_shmem_derive::ToShmem;
     39 
     40 /// Top-level SelectorBuilder struct. This should be stack-allocated by the consumer and never
     41 /// moved (because it contains a lot of inline data that would be slow to memmove).
     42 ///
     43 /// After instantiation, callers may call the push_simple_selector() and push_combinator() methods
     44 /// to append selector data as it is encountered (from left to right). Once the process is
     45 /// complete, callers should invoke build(), which transforms the contents of the SelectorBuilder
     46 /// into a heap- allocated Selector and leaves the builder in a drained state.
     47 #[derive(Debug)]
     48 pub struct SelectorBuilder<Impl: SelectorImpl> {
     49    /// The entire sequence of components. We make this large because the result of parsing a
     50    /// selector is fed into a new Arc-ed allocation, so any spilled vec would be a wasted
     51    /// allocation. Also, Components are large enough that we don't have much cache locality
     52    /// benefit from reserving stack space for fewer of them.
     53    components: SmallVec<[Component<Impl>; 32]>,
     54    last_compound_start: Option<usize>,
     55 }
     56 
     57 impl<Impl: SelectorImpl> Push<Component<Impl>> for SelectorBuilder<Impl> {
     58    fn push(&mut self, value: Component<Impl>) {
     59        self.push_simple_selector(value);
     60    }
     61 }
     62 
     63 impl<Impl: SelectorImpl> SelectorBuilder<Impl> {
     64    /// Pushes a simple selector onto the current compound selector.
     65    #[inline(always)]
     66    pub fn push_simple_selector(&mut self, ss: Component<Impl>) {
     67        debug_assert!(!ss.is_combinator());
     68        self.components.push(ss);
     69    }
     70 
     71    /// Completes the current compound selector and starts a new one, delimited by the given
     72    /// combinator.
     73    #[inline(always)]
     74    pub fn push_combinator(&mut self, c: Combinator) {
     75        self.reverse_last_compound();
     76        self.components.push(Component::Combinator(c));
     77        self.last_compound_start = Some(self.components.len());
     78    }
     79 
     80    fn reverse_last_compound(&mut self) {
     81        let start = self.last_compound_start.unwrap_or(0);
     82        self.components[start..].reverse();
     83    }
     84 
     85    /// Returns true if combinators have ever been pushed to this builder.
     86    #[inline(always)]
     87    pub fn has_combinators(&self) -> bool {
     88        self.last_compound_start.is_some()
     89    }
     90 
     91    /// Consumes the builder, producing a Selector.
     92    #[inline(always)]
     93    pub fn build(&mut self, parse_relative: ParseRelative) -> SelectorData<Impl> {
     94        // Compute the specificity and flags.
     95        let sf = specificity_and_flags(
     96            self.components.iter(),
     97            /* for_nesting_parent = */ false,
     98        );
     99        self.build_with_specificity_and_flags(sf, parse_relative)
    100    }
    101 
    102    /// Builds with an explicit SpecificityAndFlags. This is separated from build() so that unit
    103    /// tests can pass an explicit specificity.
    104    #[inline(always)]
    105    pub(crate) fn build_with_specificity_and_flags(
    106        &mut self,
    107        mut spec: SpecificityAndFlags,
    108        parse_relative: ParseRelative,
    109    ) -> SelectorData<Impl> {
    110        let implicit_addition = match parse_relative {
    111            ParseRelative::ForNesting if !spec.flags.intersects(SelectorFlags::HAS_PARENT) => {
    112                Some((Component::ParentSelector, SelectorFlags::HAS_PARENT))
    113            },
    114            ParseRelative::ForScope
    115                if !spec
    116                    .flags
    117                    .intersects(SelectorFlags::HAS_SCOPE | SelectorFlags::HAS_PARENT) =>
    118            {
    119                Some((Component::ImplicitScope, SelectorFlags::HAS_SCOPE))
    120            },
    121            _ => None,
    122        };
    123        let implicit_selector_and_combinator;
    124        let implicit_selector = if let Some((component, flag)) = implicit_addition {
    125            spec.flags.insert(flag);
    126            implicit_selector_and_combinator =
    127                [Component::Combinator(Combinator::Descendant), component];
    128            &implicit_selector_and_combinator[..]
    129        } else {
    130            &[]
    131        };
    132 
    133        // As an optimization, for a selector without combinators, we can just keep the order
    134        // as-is.
    135        if self.last_compound_start.is_none() {
    136            return Arc::from_header_and_iter(
    137                spec,
    138                ExactChain(self.components.drain(..), implicit_selector.iter().cloned()),
    139            );
    140        }
    141 
    142        self.reverse_last_compound();
    143        Arc::from_header_and_iter(
    144            spec,
    145            ExactChain(
    146                self.components.drain(..).rev(),
    147                implicit_selector.iter().cloned(),
    148            ),
    149        )
    150    }
    151 }
    152 
    153 impl<Impl: SelectorImpl> Default for SelectorBuilder<Impl> {
    154    #[inline(always)]
    155    fn default() -> Self {
    156        SelectorBuilder {
    157            components: SmallVec::new(),
    158            last_compound_start: None,
    159        }
    160    }
    161 }
    162 
    163 // This is effectively a Chain<>, but Chain isn't an ExactSizeIterator, see
    164 // https://github.com/rust-lang/rust/issues/34433
    165 struct ExactChain<A, B>(A, B);
    166 
    167 impl<A, B, Item> ExactSizeIterator for ExactChain<A, B>
    168 where
    169    A: ExactSizeIterator<Item = Item>,
    170    B: ExactSizeIterator<Item = Item>,
    171 {
    172    fn len(&self) -> usize {
    173        self.0.len() + self.1.len()
    174    }
    175 }
    176 
    177 impl<A, B, Item> Iterator for ExactChain<A, B>
    178 where
    179    A: ExactSizeIterator<Item = Item>,
    180    B: ExactSizeIterator<Item = Item>,
    181 {
    182    type Item = Item;
    183 
    184    #[inline(always)]
    185    fn next(&mut self) -> Option<Self::Item> {
    186        self.0.next().or_else(|| self.1.next())
    187    }
    188 
    189    fn size_hint(&self) -> (usize, Option<usize>) {
    190        let len = self.len();
    191        (len, Some(len))
    192    }
    193 }
    194 
    195 /// Flags that indicate at which point of parsing a selector are we.
    196 #[derive(Clone, Copy, Default, Eq, PartialEq)]
    197 #[cfg_attr(feature = "to_shmem", derive(ToShmem))]
    198 pub(crate) struct SelectorFlags(u8);
    199 
    200 bitflags! {
    201    impl SelectorFlags: u8 {
    202        const HAS_PSEUDO = 1 << 0;
    203        const HAS_SLOTTED = 1 << 1;
    204        const HAS_PART = 1 << 2;
    205        const HAS_PARENT = 1 << 3;
    206        const HAS_HOST = 1 << 4;
    207        const HAS_SCOPE = 1 << 5;
    208    }
    209 }
    210 
    211 impl core::fmt::Debug for SelectorFlags {
    212    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
    213        if self.is_empty() {
    214            write!(f, "{:#x}", Self::empty().bits())
    215        } else {
    216            bitflags::parser::to_writer(self, f)
    217        }
    218    }
    219 }
    220 
    221 impl SelectorFlags {
    222    /// When you nest a pseudo-element with something like:
    223    ///
    224    ///   ::before { & { .. } }
    225    ///
    226    /// It is not supposed to work, because :is(::before) is invalid. We can't propagate the
    227    /// pseudo-flags from inner to outer selectors, to avoid breaking our invariants.
    228    pub(crate) fn forbidden_for_nesting() -> Self {
    229        Self::HAS_PSEUDO | Self::HAS_SLOTTED | Self::HAS_PART
    230    }
    231 }
    232 
    233 #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
    234 #[cfg_attr(feature = "to_shmem", derive(ToShmem))]
    235 pub struct SpecificityAndFlags {
    236    /// There are two free bits here, since we use ten bits for each specificity
    237    /// kind (id, class, element).
    238    pub(crate) specificity: u32,
    239    /// There's padding after this field due to the size of the flags.
    240    pub(crate) flags: SelectorFlags,
    241 }
    242 
    243 const MAX_10BIT: u32 = (1u32 << 10) - 1;
    244 
    245 #[derive(Add, AddAssign, Clone, Copy, Default, Eq, Ord, PartialEq, PartialOrd)]
    246 pub(crate) struct Specificity {
    247    id_selectors: u32,
    248    class_like_selectors: u32,
    249    element_selectors: u32,
    250 }
    251 
    252 impl Specificity {
    253    // Return the specficity of a single class-like selector.
    254    #[inline]
    255    pub fn single_class_like() -> Self {
    256        Specificity {
    257            id_selectors: 0,
    258            class_like_selectors: 1,
    259            element_selectors: 0,
    260        }
    261    }
    262 }
    263 
    264 impl From<u32> for Specificity {
    265    #[inline]
    266    fn from(value: u32) -> Specificity {
    267        assert!(value <= MAX_10BIT << 20 | MAX_10BIT << 10 | MAX_10BIT);
    268        Specificity {
    269            id_selectors: value >> 20,
    270            class_like_selectors: (value >> 10) & MAX_10BIT,
    271            element_selectors: value & MAX_10BIT,
    272        }
    273    }
    274 }
    275 
    276 impl From<Specificity> for u32 {
    277    #[inline]
    278    fn from(specificity: Specificity) -> u32 {
    279        cmp::min(specificity.id_selectors, MAX_10BIT) << 20
    280            | cmp::min(specificity.class_like_selectors, MAX_10BIT) << 10
    281            | cmp::min(specificity.element_selectors, MAX_10BIT)
    282    }
    283 }
    284 
    285 fn specificity_and_flags<Impl>(
    286    iter: slice::Iter<Component<Impl>>,
    287    for_nesting_parent: bool,
    288 ) -> SpecificityAndFlags
    289 where
    290    Impl: SelectorImpl,
    291 {
    292    fn component_specificity<Impl>(
    293        simple_selector: &Component<Impl>,
    294        specificity: &mut Specificity,
    295        flags: &mut SelectorFlags,
    296        for_nesting_parent: bool,
    297    ) where
    298        Impl: SelectorImpl,
    299    {
    300        match *simple_selector {
    301            Component::Combinator(..) => {},
    302            Component::ParentSelector => flags.insert(SelectorFlags::HAS_PARENT),
    303            Component::Part(..) => {
    304                flags.insert(SelectorFlags::HAS_PART);
    305                if !for_nesting_parent {
    306                    specificity.element_selectors += 1
    307                }
    308            },
    309            Component::PseudoElement(ref pseudo) => {
    310                use crate::parser::PseudoElement;
    311                flags.insert(SelectorFlags::HAS_PSEUDO);
    312                if !for_nesting_parent {
    313                    specificity.element_selectors += pseudo.specificity_count();
    314                }
    315            },
    316            Component::LocalName(..) => specificity.element_selectors += 1,
    317            Component::Slotted(ref selector) => {
    318                flags.insert(SelectorFlags::HAS_SLOTTED);
    319                if !for_nesting_parent {
    320                    specificity.element_selectors += 1;
    321                    // Note that due to the way ::slotted works we only compete with
    322                    // other ::slotted rules, so the above rule doesn't really
    323                    // matter, but we do it still for consistency with other
    324                    // pseudo-elements.
    325                    //
    326                    // See: https://github.com/w3c/csswg-drafts/issues/1915
    327                    *specificity += Specificity::from(selector.specificity());
    328                }
    329                flags.insert(selector.flags());
    330            },
    331            Component::Host(ref selector) => {
    332                flags.insert(SelectorFlags::HAS_HOST);
    333                specificity.class_like_selectors += 1;
    334                if let Some(ref selector) = *selector {
    335                    // See: https://github.com/w3c/csswg-drafts/issues/1915
    336                    *specificity += Specificity::from(selector.specificity());
    337                    flags.insert(selector.flags());
    338                }
    339            },
    340            Component::ID(..) => {
    341                specificity.id_selectors += 1;
    342            },
    343            Component::Class(..)
    344            | Component::AttributeInNoNamespace { .. }
    345            | Component::AttributeInNoNamespaceExists { .. }
    346            | Component::AttributeOther(..)
    347            | Component::Root
    348            | Component::Empty
    349            | Component::Nth(..)
    350            | Component::NonTSPseudoClass(..) => {
    351                specificity.class_like_selectors += 1;
    352            },
    353            Component::Scope | Component::ImplicitScope => {
    354                flags.insert(SelectorFlags::HAS_SCOPE);
    355                if matches!(*simple_selector, Component::Scope) {
    356                    specificity.class_like_selectors += 1;
    357                }
    358            },
    359            Component::NthOf(ref nth_of_data) => {
    360                // https://drafts.csswg.org/selectors/#specificity-rules:
    361                //
    362                //     The specificity of the :nth-last-child() pseudo-class,
    363                //     like the :nth-child() pseudo-class, combines the
    364                //     specificity of a regular pseudo-class with that of its
    365                //     selector argument S.
    366                specificity.class_like_selectors += 1;
    367                let sf = selector_list_specificity_and_flags(
    368                    nth_of_data.selectors().iter(),
    369                    for_nesting_parent,
    370                );
    371                *specificity += Specificity::from(sf.specificity);
    372                flags.insert(sf.flags);
    373            },
    374            // https://drafts.csswg.org/selectors/#specificity-rules:
    375            //
    376            //     The specificity of an :is(), :not(), or :has() pseudo-class
    377            //     is replaced by the specificity of the most specific complex
    378            //     selector in its selector list argument.
    379            Component::Where(ref list)
    380            | Component::Negation(ref list)
    381            | Component::Is(ref list) => {
    382                let sf = selector_list_specificity_and_flags(
    383                    list.slice().iter(),
    384                    /* nested = */ true,
    385                );
    386                if !matches!(*simple_selector, Component::Where(..)) {
    387                    *specificity += Specificity::from(sf.specificity);
    388                }
    389                flags.insert(sf.flags);
    390            },
    391            Component::Has(ref relative_selectors) => {
    392                let sf = relative_selector_list_specificity_and_flags(
    393                    relative_selectors,
    394                    for_nesting_parent,
    395                );
    396                *specificity += Specificity::from(sf.specificity);
    397                flags.insert(sf.flags);
    398            },
    399            Component::ExplicitUniversalType
    400            | Component::ExplicitAnyNamespace
    401            | Component::ExplicitNoNamespace
    402            | Component::DefaultNamespace(..)
    403            | Component::Namespace(..)
    404            | Component::RelativeSelectorAnchor
    405            | Component::Invalid(..) => {
    406                // Does not affect specificity
    407            },
    408        }
    409    }
    410 
    411    let mut specificity = Default::default();
    412    let mut flags = Default::default();
    413    for simple_selector in iter {
    414        component_specificity(
    415            &simple_selector,
    416            &mut specificity,
    417            &mut flags,
    418            for_nesting_parent,
    419        );
    420    }
    421    SpecificityAndFlags {
    422        specificity: specificity.into(),
    423        flags,
    424    }
    425 }
    426 
    427 /// Finds the maximum specificity of elements in the list and returns it.
    428 pub(crate) fn selector_list_specificity_and_flags<'a, Impl: SelectorImpl>(
    429    itr: impl Iterator<Item = &'a Selector<Impl>>,
    430    for_nesting_parent: bool,
    431 ) -> SpecificityAndFlags {
    432    let mut specificity = 0;
    433    let mut flags = SelectorFlags::empty();
    434    for selector in itr {
    435        let selector_flags = selector.flags();
    436        let selector_specificity = if for_nesting_parent
    437            && selector_flags.intersects(SelectorFlags::forbidden_for_nesting())
    438        {
    439            // In this case we need to re-compute the specificity.
    440            specificity_and_flags(selector.iter_raw_match_order(), for_nesting_parent).specificity
    441        } else {
    442            selector.specificity()
    443        };
    444        specificity = std::cmp::max(specificity, selector_specificity);
    445        flags.insert(selector.flags());
    446    }
    447    SpecificityAndFlags { specificity, flags }
    448 }
    449 
    450 pub(crate) fn relative_selector_list_specificity_and_flags<Impl: SelectorImpl>(
    451    list: &[RelativeSelector<Impl>],
    452    for_nesting_parent: bool,
    453 ) -> SpecificityAndFlags {
    454    selector_list_specificity_and_flags(list.iter().map(|rel| &rel.selector), for_nesting_parent)
    455 }