commit 3e56e39ac2a5433d87b059353981c00ccf5ba788
parent 538ed294babe504cf5e08528c99b339519288831
Author: Nicolas Silva <nical@fastmail.com>
Date: Tue, 9 Dec 2025 08:19:08 +0000
Bug 1892201 - Add support for caching quad render tasks. r=gw
And use it for conic gradients with SWGL.
This takes a simplified approach for the cache key, only considering the item UID and up to 3 clips.
As a result the cache key is small and independent from the primitive kind. The downside is that it cannot deduplicate identical items within a frame.
Differential Revision: https://phabricator.services.mozilla.com/D275218
Diffstat:
4 files changed, 181 insertions(+), 53 deletions(-)
diff --git a/gfx/wr/webrender/src/pattern.rs b/gfx/wr/webrender/src/pattern.rs
@@ -2,9 +2,14 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-use api::{units::DeviceRect, ColorF};
-
-use crate::{clip::ClipStore, frame_builder::FrameBuilderConfig, render_task_graph::{RenderTaskGraphBuilder, RenderTaskId}, renderer::GpuBufferBuilder, scene::SceneProperties, spatial_tree::SpatialTree};
+use api::{ColorF, units::DeviceRect};
+
+use crate::clip::ClipStore;
+use crate::frame_builder::FrameBuilderConfig;
+use crate::render_task_graph::{RenderTaskGraphBuilder, RenderTaskId};
+use crate::renderer::GpuBufferBuilder;
+use crate::scene::SceneProperties;
+use crate::spatial_tree::SpatialTree;
#[repr(u32)]
#[cfg_attr(feature = "capture", derive(Serialize))]
diff --git a/gfx/wr/webrender/src/prepare.rs b/gfx/wr/webrender/src/prepare.rs
@@ -364,6 +364,7 @@ fn prepare_interned_prim_for_render(
prim_data,
&prim_data.kind.outer_shadow_rect,
prim_instance_index,
+ &None,
prim_spatial_node_index,
&prim_instance.vis.clip_chain,
device_pixel_scale,
@@ -666,6 +667,7 @@ fn prepare_interned_prim_for_render(
prim_data,
&prim_data.common.prim_rect,
prim_instance_index,
+ &None,
prim_spatial_node_index,
&prim_instance.vis.clip_chain,
device_pixel_scale,
@@ -746,6 +748,7 @@ fn prepare_interned_prim_for_render(
prim_data.stretch_size,
prim_data.tile_spacing,
prim_instance_index,
+ &None,
prim_spatial_node_index,
&prim_instance.vis.clip_chain,
device_pixel_scale,
@@ -858,6 +861,7 @@ fn prepare_interned_prim_for_render(
prim_data.stretch_size,
prim_data.tile_spacing,
prim_instance_index,
+ &None,
prim_spatial_node_index,
&prim_instance.vis.clip_chain,
device_pixel_scale,
@@ -905,12 +909,46 @@ fn prepare_interned_prim_for_render(
let prim_data = &mut data_stores.conic_grad[*data_handle];
if !*use_legacy_path {
+ // Conic gradients are quite slow with SWGL, so we want to cache
+ // them as much as we can, even large ones.
+ // TODO: get_surface_rect is not always cheap. We should reorganize
+ // the code so that we only call it as much as we really need it,
+ // while avoiding this much boilerplate for each primitive that uses
+ // caching.
+ let mut should_cache = frame_context.fb_config.is_software;
+ if should_cache {
+ let surface = &frame_state.surfaces[pic_context.surface_index.0];
+ let clipped_surface_rect = surface.get_surface_rect(
+ &prim_instance.vis.clip_chain.pic_coverage_rect,
+ frame_context.spatial_tree,
+ );
+
+ should_cache = if let Some(rect) = clipped_surface_rect {
+ rect.width() < 4096 && rect.height() < 4096
+ } else {
+ false
+ };
+ }
+
+ let cache_key = if should_cache {
+ quad::cache_key(
+ data_handle.uid(),
+ prim_spatial_node_index,
+ &prim_instance.vis.clip_chain,
+ frame_state.clip_store,
+ &data_stores.clip,
+ )
+ } else {
+ None
+ };
+
quad::prepare_repeatable_quad(
prim_data,
&prim_data.common.prim_rect,
prim_data.stretch_size,
prim_data.tile_spacing,
prim_instance_index,
+ &cache_key,
prim_spatial_node_index,
&prim_instance.vis.clip_chain,
device_pixel_scale,
diff --git a/gfx/wr/webrender/src/quad.rs b/gfx/wr/webrender/src/quad.rs
@@ -5,6 +5,7 @@
use api::{units::*, ClipMode, ColorF};
use euclid::point2;
+use crate::ItemUid;
use crate::batch::{BatchKey, BatchKind, BatchTextures};
use crate::clip::{ClipChainInstance, ClipIntern, ClipItemKind, ClipNodeRange, ClipSpaceConversion, ClipStore};
use crate::command_buffer::{CommandBufferIndex, PrimitiveCommand, QuadFlags};
@@ -15,8 +16,10 @@ use crate::internal_types::TextureSource;
use crate::pattern::{Pattern, PatternBuilder, PatternBuilderContext, PatternBuilderState, PatternKind, PatternShaderInput};
use crate::prim_store::{PrimitiveInstanceIndex, PrimitiveScratchBuffer};
use crate::render_task::{MaskSubPass, RenderTask, RenderTaskAddress, RenderTaskKind, SubPass};
+use crate::render_task_cache::{RenderTaskCacheKey, RenderTaskCacheKeyKind, RenderTaskParent};
use crate::render_task_graph::{RenderTaskGraph, RenderTaskGraphBuilder, RenderTaskId};
use crate::renderer::{BlendMode, GpuBufferAddress, GpuBufferBuilder, GpuBufferBuilderF, GpuBufferDataI};
+use crate::resource_cache::ResourceCache;
use crate::segment::EdgeAaSegmentMask;
use crate::space::SpaceMapper;
use crate::spatial_tree::{CoordinateSpaceMapping, SpatialNodeIndex, SpatialTree};
@@ -32,6 +35,15 @@ const MIN_AA_SEGMENTS_SIZE: f32 = 4.0;
const MIN_QUAD_SPLIT_SIZE: f32 = 256.0;
const MAX_TILES_PER_QUAD: usize = 4;
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct QuadCacheKey {
+ pub prim: u64,
+ pub clips: [u64; 3],
+}
+
/// Describes how clipping affects the rendering of a quad primitive.
///
/// As a general rule, parts of the quad that require masking are prerendered in an
@@ -68,6 +80,7 @@ pub fn prepare_quad(
pattern_builder: &dyn PatternBuilder,
local_rect: &LayoutRect,
prim_instance_index: PrimitiveInstanceIndex,
+ cache_key: &Option<QuadCacheKey>,
prim_spatial_node_index: SpatialNodeIndex,
clip_chain: &ClipChainInstance,
device_pixel_scale: DevicePixelScale,
@@ -111,14 +124,17 @@ pub fn prepare_quad(
let can_use_nine_patch = map_prim_to_raster.is_2d_scale_translation()
&& pattern_builder.can_use_nine_patch();
- let strategy = get_prim_render_strategy(
- prim_spatial_node_index,
- clip_chain,
- frame_state.clip_store,
- interned_clips,
- can_use_nine_patch,
- pattern_ctx.spatial_tree,
- );
+ let strategy = match cache_key {
+ Some(_) => QuadRenderStrategy::Indirect,
+ None => get_prim_render_strategy(
+ prim_spatial_node_index,
+ clip_chain,
+ frame_state.clip_store,
+ interned_clips,
+ can_use_nine_patch,
+ pattern_ctx.spatial_tree,
+ ),
+ };
prepare_quad_impl(
strategy,
@@ -126,6 +142,7 @@ pub fn prepare_quad(
shared_pattern.as_ref(),
local_rect,
prim_instance_index,
+ cache_key,
prim_spatial_node_index,
clip_chain,
device_pixel_scale,
@@ -148,6 +165,7 @@ pub fn prepare_repeatable_quad(
stretch_size: LayoutSize,
tile_spacing: LayoutSize,
prim_instance_index: PrimitiveInstanceIndex,
+ cache_key: &Option<QuadCacheKey>,
prim_spatial_node_index: SpatialNodeIndex,
clip_chain: &ClipChainInstance,
device_pixel_scale: DevicePixelScale,
@@ -193,14 +211,17 @@ pub fn prepare_repeatable_quad(
// coverage rect into account rather than the whole primitive's, but
// for now it does the latter so we might as well not do the work
// multiple times.
- let strategy = get_prim_render_strategy(
- prim_spatial_node_index,
- clip_chain,
- frame_state.clip_store,
- interned_clips,
- can_use_nine_patch,
- pattern_ctx.spatial_tree,
- );
+ let strategy = match cache_key {
+ Some(_) => QuadRenderStrategy::Indirect,
+ None => get_prim_render_strategy(
+ prim_spatial_node_index,
+ clip_chain,
+ frame_state.clip_store,
+ interned_clips,
+ can_use_nine_patch,
+ pattern_ctx.spatial_tree,
+ ),
+ };
let needs_repetition = stretch_size.width < local_rect.width()
|| stretch_size.height < local_rect.height();
@@ -213,6 +234,7 @@ pub fn prepare_repeatable_quad(
shared_pattern.as_ref(),
local_rect,
prim_instance_index,
+ &cache_key,
prim_spatial_node_index,
clip_chain,
device_pixel_scale,
@@ -251,6 +273,7 @@ pub fn prepare_repeatable_quad(
shared_pattern.as_ref(),
&tile_rect,
prim_instance_index,
+ &cache_key,
prim_spatial_node_index,
clip_chain,
device_pixel_scale,
@@ -272,6 +295,7 @@ fn prepare_quad_impl(
shared_pattern: Option<&Pattern>,
local_rect: &LayoutRect,
prim_instance_index: PrimitiveInstanceIndex,
+ cache_key: &Option<QuadCacheKey>,
prim_spatial_node_index: SpatialNodeIndex,
clip_chain: &ClipChainInstance,
device_pixel_scale: DevicePixelScale,
@@ -376,6 +400,7 @@ fn prepare_quad_impl(
}
let surface = &mut frame_state.surfaces[pic_context.surface_index.0];
+
let Some(clipped_surface_rect) = surface.get_surface_rect(
&clip_chain.pic_coverage_rect, ctx.spatial_tree
) else {
@@ -407,6 +432,13 @@ fn prepare_quad_impl(
ScaleOffset::identity(),
);
+ let cache_key = cache_key.as_ref().map(|key| {
+ RenderTaskCacheKey {
+ size: clipped_surface_rect.size(),
+ kind: RenderTaskCacheKeyKind::Quad(key.clone()),
+ }
+ });
+
// Render the primtive as a single instance in a render task, apply a mask
// and composite it in the current picture.
// The coordinates are provided to the shaders:
@@ -425,7 +457,10 @@ fn prepare_quad_impl(
quad_flags,
device_pixel_scale,
needs_scissor,
+ cache_key.as_ref(),
+ frame_state.resource_cache,
frame_state.rg_builder,
+ &mut frame_state.frame_gpu_data.f32,
&mut frame_state.surface_builder,
);
@@ -677,7 +712,10 @@ fn prepare_quad_impl(
quad_flags,
device_pixel_scale,
needs_scissor,
+ None,
+ frame_state.resource_cache,
state.rg_builder,
+ &mut state.frame_gpu_data.f32,
&mut frame_state.surface_builder,
);
@@ -854,7 +892,10 @@ fn prepare_quad_impl(
quad_flags,
device_pixel_scale,
false,
+ None,
+ frame_state.resource_cache,
state.rg_builder,
+ &mut state.frame_gpu_data.f32,
&mut frame_state.surface_builder,
);
scratch.quad_indirect_segments.push(QuadSegment {
@@ -976,6 +1017,36 @@ fn get_prim_render_strategy(
}
}
+pub fn cache_key(
+ prim_uid: ItemUid,
+ prim_spatial_node_index: SpatialNodeIndex,
+ clip_chain: &ClipChainInstance,
+ clip_store: &ClipStore,
+ interned_clips: &DataStore<ClipIntern>,
+) -> Option<QuadCacheKey> {
+ const CACHE_MAX_CLIPS: usize = 3;
+
+ if (clip_chain.clips_range.count as usize) >= CACHE_MAX_CLIPS {
+ return None;
+ }
+
+ let mut clip_uids = [!0; CACHE_MAX_CLIPS];
+
+ for i in 0 .. clip_chain.clips_range.count {
+ let clip_instance = clip_store.get_instance_from_range(&clip_chain.clips_range, i);
+ clip_uids[i as usize] = clip_instance.handle.uid().get_uid();
+ let clip_node = &interned_clips[clip_instance.handle];
+ if clip_node.item.spatial_node_index != prim_spatial_node_index {
+ return None;
+ }
+ }
+
+ Some(QuadCacheKey {
+ prim: prim_uid.get_uid(),
+ clips: clip_uids
+ })
+}
+
fn add_render_task_with_mask(
pattern: &Pattern,
task_size: DeviceIntSize,
@@ -989,46 +1060,58 @@ fn add_render_task_with_mask(
quad_flags: QuadFlags,
device_pixel_scale: DevicePixelScale,
needs_scissor_rect: bool,
+ cache_key: Option<&RenderTaskCacheKey>,
+ resource_cache: &mut ResourceCache,
rg_builder: &mut RenderTaskGraphBuilder,
+ gpu_buffer: &mut GpuBufferBuilderF,
surface_builder: &mut SurfaceBuilder,
) -> RenderTaskId {
- let task_id = rg_builder.add().init(RenderTask::new_dynamic(
- task_size,
- RenderTaskKind::new_prim(
- pattern.kind,
- pattern.shader_input,
- raster_spatial_node_index,
- device_pixel_scale,
- content_origin,
- prim_address_f,
- transform_id,
- aa_flags,
- quad_flags,
- needs_scissor_rect,
- pattern.texture_input.task_id,
- ),
- ));
-
- // If the pattern samples from a texture, add it as a dependency
- // of the indirect render task that relies on it.
- if pattern.texture_input.task_id != RenderTaskId::INVALID {
- rg_builder.add_dependency(task_id, pattern.texture_input.task_id);
- }
-
- if clips_range.count > 0 {
- let masks = MaskSubPass {
- clip_node_range: clips_range,
- prim_spatial_node_index,
- prim_address_f,
- };
+ let is_opaque = pattern.is_opaque && clips_range.count == 0;
+ resource_cache.request_render_task(
+ cache_key.cloned(),
+ is_opaque,
+ RenderTaskParent::Surface,
+ gpu_buffer,
+ rg_builder,
+ surface_builder,
+ &mut|rg_builder, _| {
+ let task_id = rg_builder.add().init(RenderTask::new_dynamic(
+ task_size,
+ RenderTaskKind::new_prim(
+ pattern.kind,
+ pattern.shader_input,
+ raster_spatial_node_index,
+ device_pixel_scale,
+ content_origin,
+ prim_address_f,
+ transform_id,
+ aa_flags,
+ quad_flags,
+ needs_scissor_rect,
+ pattern.texture_input.task_id,
+ ),
+ ));
+
+ // If the pattern samples from a texture, add it as a dependency
+ // of the indirect render task that relies on it.
+ if pattern.texture_input.task_id != RenderTaskId::INVALID {
+ rg_builder.add_dependency(task_id, pattern.texture_input.task_id);
+ }
- let task = rg_builder.get_task_mut(task_id);
- task.add_sub_pass(SubPass::Masks { masks });
- }
+ if clips_range.count > 0 {
+ let masks = MaskSubPass {
+ clip_node_range: clips_range,
+ prim_spatial_node_index,
+ prim_address_f,
+ };
- surface_builder.add_child_render_task(task_id, rg_builder);
+ let task = rg_builder.get_task_mut(task_id);
+ task.add_sub_pass(SubPass::Masks { masks });
+ }
- task_id
+ task_id
+ }
+ )
}
fn add_pattern_prim(
diff --git a/gfx/wr/webrender/src/render_task_cache.rs b/gfx/wr/webrender/src/render_task_cache.rs
@@ -16,6 +16,7 @@ use crate::prim_store::gradient::{
ConicGradientCacheKey,
};
use crate::prim_store::line_dec::LineDecorationCacheKey;
+use crate::quad::QuadCacheKey;
use crate::resource_cache::CacheItem;
use std::{mem, usize, f32, i32};
use crate::surface::SurfaceBuilder;
@@ -52,6 +53,7 @@ pub enum RenderTaskCacheKeyKind {
RadialGradient(RadialGradientCacheKey),
ConicGradient(ConicGradientCacheKey),
Snapshot(SnapshotImageKey),
+ Quad(QuadCacheKey),
}
#[derive(Clone, Debug, Hash, PartialEq, Eq)]