commit 8d6d36c1caaca0e12b4c497491f5433fbf4bd063
parent 39d1164545365ded239e78eda1a4a60243454e0b
Author: Denis Palmeiro <dpalmeiro@mozilla.com>
Date: Mon, 8 Dec 2025 14:28:48 +0000
Bug 1997380: Use bloom filter to optimize querySelector attribute matching. r=emilio,firefox-style-system-reviewers
Leverage the element bloom filter to quickly skip subtrees that cannot
contain elements with the queried attributes.
Differential Revision: https://phabricator.services.mozilla.com/D273067
Diffstat:
5 files changed, 201 insertions(+), 34 deletions(-)
diff --git a/layout/style/GeckoBindings.cpp b/layout/style/GeckoBindings.cpp
@@ -1787,6 +1787,10 @@ nsAtom** Gecko_Element_ExportedParts(const nsAttrValue* aValue,
return reinterpret_cast<nsAtom**>(parts->Elements());
}
+uint64_t Gecko_Element_GetSubtreeBloomFilter(const Element* aElement) {
+ return aElement->GetSubtreeBloomFilter();
+}
+
bool StyleSingleFontFamily::IsNamedFamily(const nsAString& aFamilyName) const {
if (!IsFamilyName()) {
return false;
diff --git a/layout/style/GeckoBindings.h b/layout/style/GeckoBindings.h
@@ -109,6 +109,7 @@ const nsINode* Gecko_GetNextStyleChild(mozilla::dom::StyleChildrenIterator*);
nsAtom* Gecko_Element_ImportedPart(const nsAttrValue*, nsAtom*);
nsAtom** Gecko_Element_ExportedParts(const nsAttrValue*, nsAtom*,
size_t* aOutLength);
+uint64_t Gecko_Element_GetSubtreeBloomFilter(const mozilla::dom::Element*);
NS_DECL_THREADSAFE_FFI_REFCOUNTING(mozilla::css::SheetLoadDataHolder,
SheetLoadDataHolder);
diff --git a/servo/components/style/dom.rs b/servo/components/style/dom.rs
@@ -87,6 +87,19 @@ pub struct DomDescendants<N> {
scope: N,
}
+impl<N> DomDescendants<N>
+where
+ N: TNode,
+{
+ /// Returns the next element ignoring all of our subtree.
+ #[inline]
+ pub fn next_skipping_children(&mut self) -> Option<N> {
+ let prev = self.previous.take()?;
+ self.previous = prev.next_in_preorder_skipping_children(self.scope);
+ self.previous
+ }
+}
+
impl<N> Iterator for DomDescendants<N>
where
N: TNode,
@@ -189,7 +202,15 @@ pub trait TNode: Sized + Copy + Clone + Debug + NodeInfo + PartialEq {
if let Some(c) = self.first_child() {
return Some(c);
}
+ self.next_in_preorder_skipping_children(scoped_to)
+ }
+ /// Returns the next node in tree order, skipping the children of the current node.
+ ///
+ /// This is useful when we know that a subtree cannot contain matches, allowing us
+ /// to skip entire subtrees during traversal.
+ #[inline]
+ fn next_in_preorder_skipping_children(&self, scoped_to: Self) -> Option<Self> {
let mut current = *self;
loop {
if current == scoped_to {
@@ -464,6 +485,41 @@ pub trait TElement:
false
}
+ /// Returns the bloom filter for this element's subtree, used for fast
+ /// querySelector optimization by allowing subtrees to be skipped.
+ /// Each element's filter includes hashes for all of it's class names and
+ /// attribute names (not values), along with the names for all descendent
+ /// elements.
+ ///
+ /// The default implementation returns all bits set, meaning the bloom filter
+ /// never filters anything.
+ fn subtree_bloom_filter(&self) -> u64 {
+ u64::MAX
+ }
+
+ /// Check if this element's subtree may contain elements with the given bloom hash.
+ fn bloom_may_have_hash(&self, bloom_hash: u64) -> bool {
+ let bloom = self.subtree_bloom_filter();
+ (bloom & bloom_hash) == bloom_hash
+ }
+
+ /// Convert a 32-bit atom hash to a bloom filter value using k=2 hash functions.
+ /// This must match the C++ implementation of HashForBloomFilter in Element.cpp
+ fn hash_for_bloom_filter(hash: u32) -> u64 {
+ // On 32-bit platforms, we have 31 bits available + 1 tag bit.
+ // On 64-bit platforms, we have 63 bits available + 1 tag bit.
+ #[cfg(target_pointer_width = "32")]
+ const BLOOM_BITS: u32 = 31;
+
+ #[cfg(target_pointer_width = "64")]
+ const BLOOM_BITS: u32 = 63;
+
+ let mut filter = 1u64;
+ filter |= 1u64 << (1 + (hash % BLOOM_BITS));
+ filter |= 1u64 << (1 + ((hash >> 6) % BLOOM_BITS));
+ filter
+ }
+
/// Return the list of slotted nodes of this node.
fn slotted_nodes(&self) -> &[Self::ConcreteNode] {
&[]
diff --git a/servo/components/style/dom_apis.rs b/servo/components/style/dom_apis.rs
@@ -237,26 +237,56 @@ where
fn invalidated_descendants(&mut self, _e: E, _child: E) {}
}
+enum Operation {
+ Reject,
+ Accept,
+ RejectSkippingChildren,
+}
+
+impl From<bool> for Operation {
+ #[inline(always)]
+ fn from(matches: bool) -> Self {
+ if matches {
+ Operation::Accept
+ } else {
+ Operation::Reject
+ }
+ }
+}
+
fn collect_all_elements<E, Q, F>(root: E::ConcreteNode, results: &mut Q::Output, mut filter: F)
where
E: TElement,
Q: SelectorQuery<E>,
- F: FnMut(E) -> bool,
+ F: FnMut(E) -> Operation,
{
- for node in root.dom_descendants() {
+ let mut iter = root.dom_descendants();
+ let mut cur = iter.next();
+ while let Some(node) = cur {
let element = match node.as_element() {
Some(e) => e,
- None => continue,
+ None => {
+ cur = iter.next();
+ continue;
+ },
};
-
- if !filter(element) {
- continue;
- }
-
- Q::append_element(results, element);
- if Q::should_stop_after_first_match() {
- return;
+ match filter(element) {
+ // Element matches - add to results and continue traversing its children.
+ Operation::Accept => {
+ Q::append_element(results, element);
+ if Q::should_stop_after_first_match() {
+ return;
+ }
+ },
+ // Element doesn't match - skip it but continue traversing its children.
+ Operation::Reject => {},
+ // Element doesn't match and skip entire subtree.
+ Operation::RejectSkippingChildren => {
+ cur = iter.next_skipping_children();
+ continue;
+ },
}
+ cur = iter.next();
}
}
@@ -344,7 +374,7 @@ fn collect_elements_with_id<E, Q, F>(
Ok(elements) => elements,
Err(()) => {
collect_all_elements::<E, Q, _>(root, results, |e| {
- e.has_id(id, class_and_id_case_sensitivity) && filter(e)
+ Operation::from(e.has_id(id, class_and_id_case_sensitivity) && filter(e))
});
return;
@@ -449,26 +479,68 @@ where
{
match *component {
Component::ExplicitUniversalType => {
- collect_all_elements::<E, Q, _>(root, results, |_| true)
+ collect_all_elements::<E, Q, _>(root, results, |_| Operation::Accept)
+ },
+ Component::Class(ref class) => {
+ // Bloom filter can only be used when case sensitive.
+ let bloom_hash = if class_and_id_case_sensitivity == CaseSensitivity::CaseSensitive {
+ Some(E::hash_for_bloom_filter(class.0.get_hash()))
+ } else {
+ None
+ };
+
+ collect_all_elements::<E, Q, _>(root, results, |element| {
+ if bloom_hash.is_some_and(|hash| !element.bloom_may_have_hash(hash)) {
+ return Operation::RejectSkippingChildren;
+ }
+ Operation::from(element.has_class(class, class_and_id_case_sensitivity))
+ });
},
- Component::Class(ref class) => collect_all_elements::<E, Q, _>(root, results, |element| {
- element.has_class(class, class_and_id_case_sensitivity)
- }),
Component::LocalName(ref local_name) => {
collect_all_elements::<E, Q, _>(root, results, |element| {
- local_name_matches(element, local_name)
+ Operation::from(local_name_matches(element, local_name))
})
},
Component::AttributeInNoNamespaceExists {
ref local_name,
ref local_name_lower,
- } => collect_all_elements::<E, Q, _>(root, results, |element| {
- element.has_attr_in_no_namespace(matching::select_name(
- &element,
- local_name,
- local_name_lower,
- ))
- }),
+ } => {
+ // For HTML elements: C++ hashes lowercase
+ // For XUL/SVG/MathML elements: C++ hashes original case
+ let hash_original = E::hash_for_bloom_filter(local_name.0.get_hash());
+ let hash_lower = if local_name.0 == local_name_lower.0 {
+ hash_original
+ } else {
+ E::hash_for_bloom_filter(local_name_lower.0.get_hash())
+ };
+
+ collect_all_elements::<E, Q, _>(root, results, |element| {
+ // Check bloom filter first
+ let bloom_found_hash = if hash_original == hash_lower
+ || !element.as_node().owner_doc().is_html_document()
+ {
+ element.bloom_may_have_hash(hash_original)
+ } else if element.is_html_element_in_html_document() {
+ // HTML elements store lowercase hashes
+ element.bloom_may_have_hash(hash_lower)
+ } else {
+ // Non-HTML elements in HTML documents might have HTML descendants
+ // with lowercase-only hashes, so check both
+ element.bloom_may_have_hash(hash_original)
+ || element.bloom_may_have_hash(hash_lower)
+ };
+
+ if !bloom_found_hash {
+ return Operation::RejectSkippingChildren;
+ }
+
+ Operation::from(element.has_attr_in_no_namespace(matching::select_name(
+ &element,
+ local_name,
+ local_name_lower,
+ )))
+ });
+ },
Component::AttributeInNoNamespace {
ref local_name,
ref value,
@@ -477,8 +549,15 @@ where
} => {
let empty_namespace = selectors::parser::namespace_empty_string::<E::Impl>();
let namespace_constraint = NamespaceConstraint::Specific(&empty_namespace);
+
+ // Only use bloom filter to check for attribute name existence.
+ let bloom_hash = E::hash_for_bloom_filter(local_name.0.get_hash());
+
collect_all_elements::<E, Q, _>(root, results, |element| {
- element.attr_matches(
+ if !element.bloom_may_have_hash(bloom_hash) {
+ return Operation::RejectSkippingChildren;
+ }
+ Operation::from(element.attr_matches(
&namespace_constraint,
local_name,
&AttrSelectorOperation::WithValue {
@@ -489,8 +568,8 @@ where
),
value,
},
- )
- })
+ ))
+ });
},
ref other => {
let id = match get_id(other) {
@@ -693,20 +772,38 @@ where
match simple_filter {
SimpleFilter::Class(ref class) => {
collect_all_elements::<E, Q, _>(root, results, |element| {
- element.has_class(class, class_and_id_case_sensitivity)
- && matching::matches_selector_list(selector_list, &element, matching_context)
+ Operation::from(
+ element.has_class(class, class_and_id_case_sensitivity)
+ && matching::matches_selector_list(
+ selector_list,
+ &element,
+ matching_context,
+ ),
+ )
});
},
SimpleFilter::LocalName(ref local_name) => {
collect_all_elements::<E, Q, _>(root, results, |element| {
- local_name_matches(element, local_name)
- && matching::matches_selector_list(selector_list, &element, matching_context)
+ Operation::from(
+ local_name_matches(element, local_name)
+ && matching::matches_selector_list(
+ selector_list,
+ &element,
+ matching_context,
+ ),
+ )
});
},
SimpleFilter::Attr(ref local_name) => {
collect_all_elements::<E, Q, _>(root, results, |element| {
- has_attr(element, local_name)
- && matching::matches_selector_list(selector_list, &element, matching_context)
+ Operation::from(
+ has_attr(element, local_name)
+ && matching::matches_selector_list(
+ selector_list,
+ &element,
+ matching_context,
+ ),
+ )
});
},
}
@@ -725,7 +822,11 @@ fn query_selector_slow<E, Q>(
Q: SelectorQuery<E>,
{
collect_all_elements::<E, Q, _>(root, results, |element| {
- matching::matches_selector_list(selector_list, &element, matching_context)
+ Operation::from(matching::matches_selector_list(
+ selector_list,
+ &element,
+ matching_context,
+ ))
});
}
diff --git a/servo/components/style/gecko/wrapper.rs b/servo/components/style/gecko/wrapper.rs
@@ -1103,6 +1103,11 @@ impl<'le> TElement for GeckoElement<'le> {
}
#[inline]
+ fn subtree_bloom_filter(&self) -> u64 {
+ unsafe { bindings::Gecko_Element_GetSubtreeBloomFilter(self.0) }
+ }
+
+ #[inline]
fn local_name(&self) -> &WeakAtom {
unsafe { WeakAtom::new(self.as_node().node_info().mInner.mName) }
}