[ tor-browser ].git.dasho

commit 8518228686575a3cef794d66c605557afb28e0ad
parent 5759a1c8dad49b6be7326a1d67308b34cdc7c77c
Author: Nicolas Silva <nical@fastmail.com>
Date:   Thu, 27 Nov 2025 08:15:16 +0000

Bug 1978773 - Do a tree traversal of gradient stops in SWGL. r=lsalzman

This patch replaces the linear search in the new SWGL gradient paths with the same tree traversal as the GPU version.
The linear version had the advantage of being incremental, so within a span, searches were very fast after the first one. Unfortunately it required the SWGL and GPU paths to use different representations for gradient stop offsets which led to issues when accidentally falling back to the translated glsl code in SWGL.
This version is not incremental so we do the full search each time the span crosses a new gradient stop. This should only matter when the number of gradient stops is high. The tree structure very aggressively converges, though. For example it takes 3 iterations to find a pair in a 124 stops gradient.

Differential Revision: https://phabricator.services.mozilla.com/D274008

Diffstat:
M gfx/wr/swgl/src/swgl_ext.h  | 171 +++++++++++++++++++++++++++++++++++++++++--------------------------------------
M gfx/wr/webrender/res/ps_quad_gradient.glsl  | 2 +-
M gfx/wr/webrender/src/prim_store/gradient/conic.rs  | 2 +-
M gfx/wr/webrender/src/prim_store/gradient/linear.rs  | 19 +++++--------------
M gfx/wr/webrender/src/prim_store/gradient/mod.rs  | 74 ++++++++++++++------------------------------------------------------------
M gfx/wr/webrender/src/prim_store/gradient/radial.rs  | 19 +++++--------------

6 files changed, 115 insertions(+), 172 deletions(-)
diff --git a/gfx/wr/swgl/src/swgl_ext.h b/gfx/wr/swgl/src/swgl_ext.h
@@ -1510,79 +1510,101 @@ static ALWAYS_INLINE WideRGBA8 dither(WideRGBA8 color, int32_t fragCoordX,
   return color + ditherNoiseYIndexed[fragCoordX & 7];
 }
 
-// Finds the gradient stop pair that affects the provided offset.
-//
-// The returned index corresponds to the gradient stop immediately before the
-// current offset (in increasing offset order), so the gradient stop pair
-// is defined by [index, index + 1].
-//
-// Output parameters:
-//  - (out) prevOffset is set to the offset of the returned stop.
-//  - (out) nextOffset is set to the offset of the following stop.
-//  - (in/out) initialIndex is the index at which to start the search.
-//    It is updated by this function.
-//  - (in/out) initialOffset is the offset of the stop at initalOffset.
-//    It is updated by this function.
+/// Find the gradient stops pair affecting the current offset by searching
+/// into gradient stop offsets organized in a tree structure.
+///
+/// This is ported from sample_gradient_stops_tree in ps_quad_gradient.glsl.
+/// The tree structure is explained in the documentation of
+/// write_gpu_gradient_stops_tree in prim_store/gradient/mod.rs
 static int32_t findGradientStopPair(float offset, float* stops,
-                                    int32_t numStops, int32_t& initialIndex,
-                                    float& initialOffset, float& prevOffset,
+                                    int32_t numStops,
+                                    float& prevOffset,
                                     float& nextOffset) {
-  int32_t index = initialIndex;
-
-  // Walk forward or backward depending on where the target offset is relative
-  // to the initial offset.
-  if (offset >= initialOffset) {
-    // Walk the gradient stops forward
-    float next = stops[initialIndex];
-    float prev = stops[max(initialIndex - 1, 0)];
-    while (index < numStops) {
-      if (next > offset) {
-        break;
-      }
-
-      index += 1;
-      prev = next;
-      next = stops[index];
+    int32_t levelBaseAddr = 0;
+    // Number of blocks of 4 indices for the current level.
+    // At the root, a single block is stored. Each level stores
+    // 5 times more blocks than the previous one.
+    int32_t levelStride = 1;
+    // Relative address within the current level.
+    int32_t offsetInLevel = 0;
+    // By the end of this function, this will contain the index of the
+    // second stop of the pair we are looking for.
+    int32_t index = 0;
+
+    // The index distance between consecutive stop offsets at
+    // the current level. At the last level, the stride is 1.
+    // each has a 5 times more stride than the next (so the
+    // index stride starts high and is divided by 5 at each
+    // iteration).
+    int32_t indexStride = 1;
+    while (indexStride * 5 <= numStops) {
+        indexStride *= 5;
     }
 
-    // We wither:
-    //  - Walked 1 stop past the one we are looking for, so we need to
-    //    adjust the index by decrementing it.
-    //  - Walked past the last stop so the index is out of bounds. We
-    //    don't *have* to decrement it since we are going to clamp it
-    //    at the end of the function but doing it does not break the
-    //    logic either.
-    index -= 1;
-
-    prevOffset = prev;
-    nextOffset = next;
-  } else {
-    // Walk back.
-    float next = stops[initialIndex];
-    float prev = stops[min(initialIndex + 1, numStops - 1)];
-    while (index > 0) {
-      if (next < offset) {
-        break;
-      }
 
-      index -= 1;
-      prev = next;
-      next = stops[index];
-    }
+    // We take advantage of the fact that stop offsets are normalized from
+    // 0 to 1 which means that the first offset is always 0 and the last is
+    // always 1.
+    // This is important because in the loop, we won't be setting prevOffset
+    // if offset is < 0.0 and won't be setting nextOffset if offset > 1.0,
+    // so initializing them this way here handles those cases.
+    prevOffset = 0.0;
+    nextOffset = 1.0;
+
+    while (true) {
+        int32_t addr = (levelBaseAddr + offsetInLevel) * 4;
+        float currentStops0 = stops[addr];
+        float currentStops1 = stops[addr + 1];
+        float currentStops2 = stops[addr + 2];
+        float currentStops3 = stops[addr + 3];
+
+        // Determine which of the five partitions (sub-trees)
+        // to take next.
+        int32_t nextPartition = 4;
+        if (currentStops0 > offset) {
+            nextPartition = 0;
+            nextOffset = currentStops0;
+        } else if (currentStops1 > offset) {
+            nextPartition = 1;
+            prevOffset = currentStops0;
+            nextOffset = currentStops1;
+        } else if (currentStops2 > offset) {
+            nextPartition = 2;
+            prevOffset = currentStops1;
+            nextOffset = currentStops2;
+        } else if (currentStops3 > offset) {
+            nextPartition = 3;
+            prevOffset = currentStops2;
+            nextOffset = currentStops3;
+        } else {
+            prevOffset = currentStops3;
+        }
 
-    // Since we are walking backwards, prev and next are swapped.
-    prevOffset = next;
-    nextOffset = prev;
-  }
+        index += nextPartition * indexStride;
+
+        if (indexStride == 1) {
+            // If the index stride is 1, we visited a leaf,
+            // we are done.
+            break;
+        }
 
-  index = clamp(index, 0, numStops - 2);
+        indexStride /= 5;
+        levelBaseAddr += levelStride;
+        levelStride *= 5;
+        offsetInLevel = offsetInLevel * 5 + nextPartition;
+    }
 
-  initialIndex = index;
-  initialOffset = prevOffset;
+    // clamp the index to [1..numStops]
+    if (index < 1) {
+        index = 1;
+    } else if (index > numStops - 1) {
+        index = numStops - 1;
+    }
 
-  return index;
+    return index - 1;
 }
 
+
 // Samples an entire span of a linear gradient by crawling the gradient table
 // and looking for consecutive stops that can be merged into a single larger
 // gradient, then interpolating between those larger gradients within the span.
@@ -1835,13 +1857,6 @@ static bool commitLinearGradientFromStops(sampler2D sampler, int offsetsAddress,
     return false;
   }
 
-  // In order to avoid re-traversing the whole sequence of gradient stops for
-  // each sub-span when searching for the pair of stops that affect it, we keep
-  // track a recent offset+index to start the search from.
-  int32_t initialIndex = 0;
-  // This is not the real offset, what matters is that it is lower than lowest
-  // stop offset (since we start searching at index 0).
-  float initialOffset = -1.0f;
   for (; span > 0;) {
     // The number of pixels that are affected by the current gradient stop pair.
     float subSpan = span;
@@ -1874,8 +1889,8 @@ static bool commitLinearGradientFromStops(sampler2D sampler, int offsetsAddress,
       // that affect the start of the current block and how many blocks
       // are affected by the same pair.
       stopIndex =
-          findGradientStopPair(offset.x, stopOffsets, stopCount, initialIndex,
-                               initialOffset, prevOffset, nextOffset);
+          findGradientStopPair(offset.x, stopOffsets, stopCount,
+                               prevOffset, nextOffset);
       float offsetRange =
           delta > 0.0f ? nextOffset - offset.x : prevOffset - offset.x;
       subSpan = min(subSpan, offsetRange / delta);
@@ -2391,15 +2406,7 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
   Float dotPos = dot(pos, pos);
   Float dotPosDelta = 2.0f * dot(pos, delta) + deltaDelta;
   float deltaDelta2 = 2.0f * deltaDelta;
-  // In order to avoid re-traversing the whole sequence of gradient stops for
-  // each sub-span when searching for the pair of stops that affect it, we keep
-  // track a recent offset+index to start the search from. We start from the
-  // last stop since it is more likely that the edge of the quad is away from
-  // the center than close to it.
-  int32_t initialIndex = stopCount - 1;
-  // This is not the real offset what matters is that it is greater than the
-  // outermost one.
-  float initialOffset = 2.0f;
+
   for (int t = 0; t < span;) {
     // Compute the gradient table offset from the current position.
     Float offset = fastSqrt<true>(dotPos) - startRadius;
@@ -2440,8 +2447,8 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
       // Otherwise, we're inside the valid part of the gradient table.
 
       stopIndex =
-          findGradientStopPair(offset.x, stopOffsets, stopCount, initialIndex,
-                               initialOffset, prevOffset, nextOffset);
+          findGradientStopPair(offset.x, stopOffsets, stopCount,
+                                   prevOffset, nextOffset);
       if (t >= middleT) {
         intercept = adjustedStartRadius + nextOffset;
       } else {
diff --git a/gfx/wr/webrender/res/ps_quad_gradient.glsl b/gfx/wr/webrender/res/ps_quad_gradient.glsl
@@ -193,7 +193,7 @@ vec4 sample_gradient_stops_tree(float offset) {
     // The index distance between consecutive stop offsets at
     // the current level. At the last level, the stride is 1.
     // each has a 5 times more stride than the next (so the
-    // index stride starts high and is devided by 5 at each
+    // index stride starts high and is divided by 5 at each
     // iteration).
     int index_stride = 1;
     while (index_stride * 5 <= count) {
diff --git a/gfx/wr/webrender/src/prim_store/gradient/conic.rs b/gfx/wr/webrender/src/prim_store/gradient/conic.rs
@@ -493,7 +493,7 @@ pub fn conic_gradient_pattern(
     stops: &[GradientStop],
     gpu_buffer_builder: &mut GpuBufferBuilder
 ) -> Pattern {
-    let num_blocks = 2 + gpu_gradient_stops_blocks(stops.len(), true);
+    let num_blocks = 2 + gpu_gradient_stops_blocks(stops.len());
     let mut writer = gpu_buffer_builder.f32.write_blocks(num_blocks);
     writer.push_one([
         center.x,
diff --git a/gfx/wr/webrender/src/prim_store/gradient/linear.rs b/gfx/wr/webrender/src/prim_store/gradient/linear.rs
@@ -13,7 +13,7 @@ use euclid::{point2, vec2, size2};
 use api::{ExtendMode, GradientStop, LineOrientation, PremultipliedColorF, ColorF, ColorU};
 use api::units::*;
 use crate::pattern::{Pattern, PatternBuilder, PatternBuilderContext, PatternBuilderState, PatternKind, PatternShaderInput, PatternTextureInput};
-use crate::prim_store::gradient::{gpu_gradient_stops_blocks, write_gpu_gradient_stops_tree, write_gpu_gradient_stops_linear, GradientKind};
+use crate::prim_store::gradient::{gpu_gradient_stops_blocks, write_gpu_gradient_stops_tree, GradientKind};
 use crate::scene_building::IsVisible;
 use crate::frame_builder::FrameBuildingState;
 use crate::intern::{Internable, InternDebug, Handle as InternHandle};
@@ -795,10 +795,10 @@ pub fn linear_gradient_pattern(
     end: DevicePoint,
     extend_mode: ExtendMode,
     stops: &[GradientStop],
-    is_software: bool,
+    _is_software: bool,
     gpu_buffer_builder: &mut GpuBufferBuilder
 ) -> Pattern {
-    let num_blocks = 2 + gpu_gradient_stops_blocks(stops.len(), !is_software);
+    let num_blocks = 2 + gpu_gradient_stops_blocks(stops.len());
     let mut writer = gpu_buffer_builder.f32.write_blocks(num_blocks);
     writer.push_one([
         start.x,
@@ -813,17 +813,8 @@ pub fn linear_gradient_pattern(
         0.0,
     ]);
 
-    let is_opaque = if is_software {
-        // The SWGL span shaders for precise gradients can incrementally search
-        // through the stops (each search starts from where the previous one
-        // landed). So it is more efficient to store them linearly in this
-        // configuration.
-        write_gpu_gradient_stops_linear(stops, GradientKind::Linear, extend_mode, &mut writer)
-    } else {
-        // On GPUs, each pixel does its own search so we greatly benefit from
-        // the tree traversal, especially when there are many stops.
-        write_gpu_gradient_stops_tree(stops, GradientKind::Linear, extend_mode, &mut writer)
-    };
+    let is_opaque = write_gpu_gradient_stops_tree(stops, GradientKind::Linear, extend_mode, &mut writer);
+
     let gradient_address = writer.finish();
 
     Pattern {
diff --git a/gfx/wr/webrender/src/prim_store/gradient/mod.rs b/gfx/wr/webrender/src/prim_store/gradient/mod.rs
@@ -94,46 +94,6 @@ fn write_gpu_gradient_stops_header_and_colors(
     is_opaque
 }
 
-/// Builds the gpu representation for common gradient parameters and
-/// returns whether the gradient is fully opaque.
-///
-/// The format is:
-///
-/// ```ascii
-///
-/// [count, extend_mode, <padding>, color0.r, color0.g, color0.b, color0.a, ..., offset0, offset1, ..., <padding>]
-/// |_____________________________| |__________________________________________| |_______________________________|
-///        header: vec4                        colors: [vec4; n]                     offsets: [vec4; ceil(n/4)]
-/// ```
-///
-/// Packed contiguously such that each portion is 4-floats aligned to facilitate
-/// reading them from the gpu buffer.
-fn write_gpu_gradient_stops_linear(
-    stops: &[GradientStop],
-    kind: GradientKind,
-    extend_mode: ExtendMode,
-    writer: &mut GpuBufferWriterF,
-) -> bool {
-    let is_opaque = write_gpu_gradient_stops_header_and_colors(
-        stops,
-        kind,
-        extend_mode,
-        writer
-    );
-
-    for chunk in stops.chunks(4) {
-        let mut block = [0.0; 4];
-        let mut i = 0;
-        for stop in chunk {
-            block[i] = stop.offset;
-            i += 1;
-        }
-        writer.push_one(block);
-    }
-
-    is_opaque
-}
-
 // Push stop offsets in rearranged order so that the search can be carried
 // out as an implicit tree traversal.
 //
@@ -245,31 +205,25 @@ fn write_gpu_gradient_stops_tree(
     return is_opaque;
 }
 
-fn gpu_gradient_stops_blocks(num_stops: usize, tree_traversal: bool) -> usize {
+fn gpu_gradient_stops_blocks(num_stops: usize) -> usize {
     let header_blocks = 1;
     let color_blocks = num_stops;
 
-    // When using a linear traversal we need 1/4th of the number of offsets,
-    // rounded up (since we store 4 stop offsets per block).
-    let mut offset_blocks = (num_stops + 3) / 4;
-
-    if tree_traversal {
-        // If this is changed, matching changes should be made to the
-        // equivalent code in write_gpu_gradient_stops_tree.
-        let mut num_blocks_for_level = 1;
-        offset_blocks = 1;
-        while offset_blocks * 4 < num_stops {
-            num_blocks_for_level *= 5;
-            offset_blocks += num_blocks_for_level;
-        }
-
-        // Fix the capacity up to account for the fact that we don't
-        // store the entirety of the last level;
-        let num_blocks_for_last_level = num_blocks_for_level.min(num_stops / 5 + 1);
-        offset_blocks -= num_blocks_for_level;
-        offset_blocks += num_blocks_for_last_level;
+    // If this is changed, matching changes should be made to the
+    // equivalent code in write_gpu_gradient_stops_tree.
+    let mut num_blocks_for_level = 1;
+    let mut offset_blocks = 1;
+    while offset_blocks * 4 < num_stops {
+        num_blocks_for_level *= 5;
+        offset_blocks += num_blocks_for_level;
     }
 
+    // Fix the capacity up to account for the fact that we don't
+    // store the entirety of the last level;
+    let num_blocks_for_last_level = num_blocks_for_level.min(num_stops / 5 + 1);
+    offset_blocks -= num_blocks_for_level;
+    offset_blocks += num_blocks_for_last_level;
+
     header_blocks + color_blocks + offset_blocks
 }
 
diff --git a/gfx/wr/webrender/src/prim_store/gradient/radial.rs b/gfx/wr/webrender/src/prim_store/gradient/radial.rs
@@ -30,7 +30,7 @@ use std::{hash, ops::{Deref, DerefMut}};
 use super::{
     stops_and_min_alpha, GradientStopKey, GradientGpuBlockBuilder,
     apply_gradient_local_clip, gpu_gradient_stops_blocks,
-    write_gpu_gradient_stops_linear, write_gpu_gradient_stops_tree,
+    write_gpu_gradient_stops_tree,
 };
 
 /// Hashable radial gradient parameters, for use during prim interning.
@@ -626,10 +626,10 @@ pub fn radial_gradient_pattern(
     params: &RadialGradientParams,
     extend_mode: ExtendMode,
     stops: &[GradientStop],
-    is_software: bool,
+    _is_software: bool,
     gpu_buffer_builder: &mut GpuBufferBuilder
 ) -> Pattern {
-    let num_blocks = 2 + gpu_gradient_stops_blocks(stops.len(), !is_software);
+    let num_blocks = 2 + gpu_gradient_stops_blocks(stops.len());
     let mut writer = gpu_buffer_builder.f32.write_blocks(num_blocks);
     writer.push_one([
         center.x,
@@ -644,17 +644,8 @@ pub fn radial_gradient_pattern(
         0.0,
     ]);
 
-    let is_opaque = if is_software {
-        // The SWGL span shaders for precise gradients can incrementally search
-        // through the stops (each search starts from where the previous one
-        // landed). So it is more efficient to store them linearly in this
-        // configuration.
-        write_gpu_gradient_stops_linear(stops, GradientKind::Radial, extend_mode, &mut writer)
-    } else {
-        // On GPUs, each pixel does its own search so we greatly benefit from
-        // the tree traversal, especially when there are many stops.
-        write_gpu_gradient_stops_tree(stops, GradientKind::Radial, extend_mode, &mut writer)
-    };
+    let is_opaque = write_gpu_gradient_stops_tree(stops, GradientKind::Radial, extend_mode, &mut writer);
+
     let gradient_address = writer.finish();
 
     Pattern {

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	gfx/wr/swgl/src/swgl_ext.h	\|	171	+++++++++++++++++++++++++++++++++++++++++--------------------------------------
M	gfx/wr/webrender/res/ps_quad_gradient.glsl	\|	2	+-
M	gfx/wr/webrender/src/prim_store/gradient/conic.rs	\|	2	+-
M	gfx/wr/webrender/src/prim_store/gradient/linear.rs	\|	19	+++++--------------
M	gfx/wr/webrender/src/prim_store/gradient/mod.rs	\|	74	++++++++++++++------------------------------------------------------------
M	gfx/wr/webrender/src/prim_store/gradient/radial.rs	\|	19	+++++--------------