tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 8d6d36c1caaca0e12b4c497491f5433fbf4bd063
parent 39d1164545365ded239e78eda1a4a60243454e0b
Author: Denis Palmeiro <dpalmeiro@mozilla.com>
Date:   Mon,  8 Dec 2025 14:28:48 +0000

Bug 1997380: Use bloom filter to optimize querySelector attribute matching. r=emilio,firefox-style-system-reviewers

Leverage the element bloom filter to quickly skip subtrees that cannot
contain elements with the queried attributes.

Differential Revision: https://phabricator.services.mozilla.com/D273067

Diffstat:
Mlayout/style/GeckoBindings.cpp | 4++++
Mlayout/style/GeckoBindings.h | 1+
Mservo/components/style/dom.rs | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mservo/components/style/dom_apis.rs | 169+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Mservo/components/style/gecko/wrapper.rs | 5+++++
5 files changed, 201 insertions(+), 34 deletions(-)

diff --git a/layout/style/GeckoBindings.cpp b/layout/style/GeckoBindings.cpp @@ -1787,6 +1787,10 @@ nsAtom** Gecko_Element_ExportedParts(const nsAttrValue* aValue, return reinterpret_cast<nsAtom**>(parts->Elements()); } +uint64_t Gecko_Element_GetSubtreeBloomFilter(const Element* aElement) { + return aElement->GetSubtreeBloomFilter(); +} + bool StyleSingleFontFamily::IsNamedFamily(const nsAString& aFamilyName) const { if (!IsFamilyName()) { return false; diff --git a/layout/style/GeckoBindings.h b/layout/style/GeckoBindings.h @@ -109,6 +109,7 @@ const nsINode* Gecko_GetNextStyleChild(mozilla::dom::StyleChildrenIterator*); nsAtom* Gecko_Element_ImportedPart(const nsAttrValue*, nsAtom*); nsAtom** Gecko_Element_ExportedParts(const nsAttrValue*, nsAtom*, size_t* aOutLength); +uint64_t Gecko_Element_GetSubtreeBloomFilter(const mozilla::dom::Element*); NS_DECL_THREADSAFE_FFI_REFCOUNTING(mozilla::css::SheetLoadDataHolder, SheetLoadDataHolder); diff --git a/servo/components/style/dom.rs b/servo/components/style/dom.rs @@ -87,6 +87,19 @@ pub struct DomDescendants<N> { scope: N, } +impl<N> DomDescendants<N> +where + N: TNode, +{ + /// Returns the next element ignoring all of our subtree. + #[inline] + pub fn next_skipping_children(&mut self) -> Option<N> { + let prev = self.previous.take()?; + self.previous = prev.next_in_preorder_skipping_children(self.scope); + self.previous + } +} + impl<N> Iterator for DomDescendants<N> where N: TNode, @@ -189,7 +202,15 @@ pub trait TNode: Sized + Copy + Clone + Debug + NodeInfo + PartialEq { if let Some(c) = self.first_child() { return Some(c); } + self.next_in_preorder_skipping_children(scoped_to) + } + /// Returns the next node in tree order, skipping the children of the current node. + /// + /// This is useful when we know that a subtree cannot contain matches, allowing us + /// to skip entire subtrees during traversal. + #[inline] + fn next_in_preorder_skipping_children(&self, scoped_to: Self) -> Option<Self> { let mut current = *self; loop { if current == scoped_to { @@ -464,6 +485,41 @@ pub trait TElement: false } + /// Returns the bloom filter for this element's subtree, used for fast + /// querySelector optimization by allowing subtrees to be skipped. + /// Each element's filter includes hashes for all of it's class names and + /// attribute names (not values), along with the names for all descendent + /// elements. + /// + /// The default implementation returns all bits set, meaning the bloom filter + /// never filters anything. + fn subtree_bloom_filter(&self) -> u64 { + u64::MAX + } + + /// Check if this element's subtree may contain elements with the given bloom hash. + fn bloom_may_have_hash(&self, bloom_hash: u64) -> bool { + let bloom = self.subtree_bloom_filter(); + (bloom & bloom_hash) == bloom_hash + } + + /// Convert a 32-bit atom hash to a bloom filter value using k=2 hash functions. + /// This must match the C++ implementation of HashForBloomFilter in Element.cpp + fn hash_for_bloom_filter(hash: u32) -> u64 { + // On 32-bit platforms, we have 31 bits available + 1 tag bit. + // On 64-bit platforms, we have 63 bits available + 1 tag bit. + #[cfg(target_pointer_width = "32")] + const BLOOM_BITS: u32 = 31; + + #[cfg(target_pointer_width = "64")] + const BLOOM_BITS: u32 = 63; + + let mut filter = 1u64; + filter |= 1u64 << (1 + (hash % BLOOM_BITS)); + filter |= 1u64 << (1 + ((hash >> 6) % BLOOM_BITS)); + filter + } + /// Return the list of slotted nodes of this node. fn slotted_nodes(&self) -> &[Self::ConcreteNode] { &[] diff --git a/servo/components/style/dom_apis.rs b/servo/components/style/dom_apis.rs @@ -237,26 +237,56 @@ where fn invalidated_descendants(&mut self, _e: E, _child: E) {} } +enum Operation { + Reject, + Accept, + RejectSkippingChildren, +} + +impl From<bool> for Operation { + #[inline(always)] + fn from(matches: bool) -> Self { + if matches { + Operation::Accept + } else { + Operation::Reject + } + } +} + fn collect_all_elements<E, Q, F>(root: E::ConcreteNode, results: &mut Q::Output, mut filter: F) where E: TElement, Q: SelectorQuery<E>, - F: FnMut(E) -> bool, + F: FnMut(E) -> Operation, { - for node in root.dom_descendants() { + let mut iter = root.dom_descendants(); + let mut cur = iter.next(); + while let Some(node) = cur { let element = match node.as_element() { Some(e) => e, - None => continue, + None => { + cur = iter.next(); + continue; + }, }; - - if !filter(element) { - continue; - } - - Q::append_element(results, element); - if Q::should_stop_after_first_match() { - return; + match filter(element) { + // Element matches - add to results and continue traversing its children. + Operation::Accept => { + Q::append_element(results, element); + if Q::should_stop_after_first_match() { + return; + } + }, + // Element doesn't match - skip it but continue traversing its children. + Operation::Reject => {}, + // Element doesn't match and skip entire subtree. + Operation::RejectSkippingChildren => { + cur = iter.next_skipping_children(); + continue; + }, } + cur = iter.next(); } } @@ -344,7 +374,7 @@ fn collect_elements_with_id<E, Q, F>( Ok(elements) => elements, Err(()) => { collect_all_elements::<E, Q, _>(root, results, |e| { - e.has_id(id, class_and_id_case_sensitivity) && filter(e) + Operation::from(e.has_id(id, class_and_id_case_sensitivity) && filter(e)) }); return; @@ -449,26 +479,68 @@ where { match *component { Component::ExplicitUniversalType => { - collect_all_elements::<E, Q, _>(root, results, |_| true) + collect_all_elements::<E, Q, _>(root, results, |_| Operation::Accept) + }, + Component::Class(ref class) => { + // Bloom filter can only be used when case sensitive. + let bloom_hash = if class_and_id_case_sensitivity == CaseSensitivity::CaseSensitive { + Some(E::hash_for_bloom_filter(class.0.get_hash())) + } else { + None + }; + + collect_all_elements::<E, Q, _>(root, results, |element| { + if bloom_hash.is_some_and(|hash| !element.bloom_may_have_hash(hash)) { + return Operation::RejectSkippingChildren; + } + Operation::from(element.has_class(class, class_and_id_case_sensitivity)) + }); }, - Component::Class(ref class) => collect_all_elements::<E, Q, _>(root, results, |element| { - element.has_class(class, class_and_id_case_sensitivity) - }), Component::LocalName(ref local_name) => { collect_all_elements::<E, Q, _>(root, results, |element| { - local_name_matches(element, local_name) + Operation::from(local_name_matches(element, local_name)) }) }, Component::AttributeInNoNamespaceExists { ref local_name, ref local_name_lower, - } => collect_all_elements::<E, Q, _>(root, results, |element| { - element.has_attr_in_no_namespace(matching::select_name( - &element, - local_name, - local_name_lower, - )) - }), + } => { + // For HTML elements: C++ hashes lowercase + // For XUL/SVG/MathML elements: C++ hashes original case + let hash_original = E::hash_for_bloom_filter(local_name.0.get_hash()); + let hash_lower = if local_name.0 == local_name_lower.0 { + hash_original + } else { + E::hash_for_bloom_filter(local_name_lower.0.get_hash()) + }; + + collect_all_elements::<E, Q, _>(root, results, |element| { + // Check bloom filter first + let bloom_found_hash = if hash_original == hash_lower + || !element.as_node().owner_doc().is_html_document() + { + element.bloom_may_have_hash(hash_original) + } else if element.is_html_element_in_html_document() { + // HTML elements store lowercase hashes + element.bloom_may_have_hash(hash_lower) + } else { + // Non-HTML elements in HTML documents might have HTML descendants + // with lowercase-only hashes, so check both + element.bloom_may_have_hash(hash_original) + || element.bloom_may_have_hash(hash_lower) + }; + + if !bloom_found_hash { + return Operation::RejectSkippingChildren; + } + + Operation::from(element.has_attr_in_no_namespace(matching::select_name( + &element, + local_name, + local_name_lower, + ))) + }); + }, Component::AttributeInNoNamespace { ref local_name, ref value, @@ -477,8 +549,15 @@ where } => { let empty_namespace = selectors::parser::namespace_empty_string::<E::Impl>(); let namespace_constraint = NamespaceConstraint::Specific(&empty_namespace); + + // Only use bloom filter to check for attribute name existence. + let bloom_hash = E::hash_for_bloom_filter(local_name.0.get_hash()); + collect_all_elements::<E, Q, _>(root, results, |element| { - element.attr_matches( + if !element.bloom_may_have_hash(bloom_hash) { + return Operation::RejectSkippingChildren; + } + Operation::from(element.attr_matches( &namespace_constraint, local_name, &AttrSelectorOperation::WithValue { @@ -489,8 +568,8 @@ where ), value, }, - ) - }) + )) + }); }, ref other => { let id = match get_id(other) { @@ -693,20 +772,38 @@ where match simple_filter { SimpleFilter::Class(ref class) => { collect_all_elements::<E, Q, _>(root, results, |element| { - element.has_class(class, class_and_id_case_sensitivity) - && matching::matches_selector_list(selector_list, &element, matching_context) + Operation::from( + element.has_class(class, class_and_id_case_sensitivity) + && matching::matches_selector_list( + selector_list, + &element, + matching_context, + ), + ) }); }, SimpleFilter::LocalName(ref local_name) => { collect_all_elements::<E, Q, _>(root, results, |element| { - local_name_matches(element, local_name) - && matching::matches_selector_list(selector_list, &element, matching_context) + Operation::from( + local_name_matches(element, local_name) + && matching::matches_selector_list( + selector_list, + &element, + matching_context, + ), + ) }); }, SimpleFilter::Attr(ref local_name) => { collect_all_elements::<E, Q, _>(root, results, |element| { - has_attr(element, local_name) - && matching::matches_selector_list(selector_list, &element, matching_context) + Operation::from( + has_attr(element, local_name) + && matching::matches_selector_list( + selector_list, + &element, + matching_context, + ), + ) }); }, } @@ -725,7 +822,11 @@ fn query_selector_slow<E, Q>( Q: SelectorQuery<E>, { collect_all_elements::<E, Q, _>(root, results, |element| { - matching::matches_selector_list(selector_list, &element, matching_context) + Operation::from(matching::matches_selector_list( + selector_list, + &element, + matching_context, + )) }); } diff --git a/servo/components/style/gecko/wrapper.rs b/servo/components/style/gecko/wrapper.rs @@ -1103,6 +1103,11 @@ impl<'le> TElement for GeckoElement<'le> { } #[inline] + fn subtree_bloom_filter(&self) -> u64 { + unsafe { bindings::Gecko_Element_GetSubtreeBloomFilter(self.0) } + } + + #[inline] fn local_name(&self) -> &WeakAtom { unsafe { WeakAtom::new(self.as_node().node_info().mInner.mName) } }