[ tor-browser ].git.dasho

commit e55de54f2e2bbfc8e54884023fcf65af6406a291
parent 6ae6008259d9b6f7933d230627f0fcde2c949531
Author: Nicolas Silva <nical@fastmail.com>
Date:   Fri, 17 Oct 2025 08:18:25 +0000

Bug 1978773 - Implement the SWGL fast path for precise linear gradients. r=lsalzman"

Differential Revision: https://phabricator.services.mozilla.com/D268100

Diffstat:
M gfx/wr/glsl-to-cxx/src/hir.rs  | 16 ++++++++++++++++
M gfx/wr/swgl/src/swgl_ext.h  | 256 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
M gfx/wr/webrender/res/ps_quad_gradient.glsl  | 27 +++++++++++++++++++++------
M gfx/wr/webrender/src/prim_store/gradient/linear.rs  | 19 +++++++++++++++----

4 files changed, 289 insertions(+), 29 deletions(-)
diff --git a/gfx/wr/glsl-to-cxx/src/hir.rs b/gfx/wr/glsl-to-cxx/src/hir.rs
@@ -3982,6 +3982,22 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio
     );
     declare_function(
         state,
+        "swgl_commitLinearGradientFromStopsRGBA8",
+        None,
+        Type::new(Void),
+        vec![Type::new(Sampler2D), Type::new(Int), Type::new(Int), Type::new(Float), Type::new(Bool),
+             Type::new(Vec2), Type::new(Vec2), Type::new(Float)],
+    );
+    declare_function(
+        state,
+        "swgl_commitDitheredLinearGradientFromStopsRGBA8",
+        None,
+        Type::new(Void),
+        vec![Type::new(Sampler2D), Type::new(Int), Type::new(Int), Type::new(Float), Type::new(Bool),
+             Type::new(Vec2), Type::new(Vec2), Type::new(Float), Type::new(Vec4)],
+    );
+    declare_function(
+        state,
         "swgl_commitRadialGradientRGBA8",
         None,
         Type::new(Void),
diff --git a/gfx/wr/swgl/src/swgl_ext.h b/gfx/wr/swgl/src/swgl_ext.h
@@ -1349,7 +1349,8 @@ static inline int swgl_validateGradient(sampler2D sampler, ivec2_scalar address,
              : -1;
 }
 
-static inline int swgl_validateGradientFromStops(sampler2D sampler, ivec2_scalar address,
+static inline int swgl_validateGradientFromStops(sampler2D sampler,
+                                                 ivec2_scalar address,
                                                  int entries) {
   // 1px (4 floats per color stop).
   int colors_size = entries;
@@ -1621,6 +1622,7 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size,
       distCoeffsY = vec2_scalar{step(0.0f, posStep.y), 1.0f} * recip(posStep.y);
     }
   }
+
   for (; span > 0;) {
     // Try to process as many chunks as are within the span if possible.
     float chunks = 0.25f * span;
@@ -1799,6 +1801,183 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size,
   return true;
 }
 
+// Samples an entire span of a linear gradient.
+template <bool BLEND, bool DITHER>
+static bool commitLinearGradientFromStops(sampler2D sampler, int offsetsAddress,
+                                          int colorsAddress, float stopCount,
+                                          bool gradientRepeat, vec2 pos,
+                                          const vec2_scalar& scaleDir,
+                                          float startOffset, uint32_t* buf,
+                                          int span, vec4 fragCoord = vec4()) {
+  assert(sampler->format == TextureFormat::RGBA32F);
+  // Stop offsets are expected to be stored just after the colors.
+  assert(colorsAddress >= 0 && colorsAddress < offsetsAddress);
+  assert(offsetsAddress >= 0 && offsetsAddress + (stopCount + 3) / 4 <
+                                    int(sampler->height * sampler->stride));
+  float* stopOffsets = (float*)&sampler->buf[offsetsAddress];
+  Float* stopColors = (Float*)&sampler->buf[colorsAddress];
+
+  // Number of pixels per chunks.
+  const float CHUNK_SIZE = 4.0f;
+
+  // Only incremented in the case of dithering
+  // Only incremented in the case of dithering
+  int32_t currentFragCoordX = int32_t(fragCoord.x.x);
+  const auto* ditherNoiseYIndexed =
+      DITHER ? getDitherNoise(int32_t(fragCoord.y.x)) : nullptr;
+
+  // Get the pixel delta from the difference in offset steps. This represents
+  // how far within the gradient offset range we advance for every step in
+  // output.
+  vec2_scalar posStep = dFdx(pos);
+  float delta = dot(posStep, scaleDir);
+  if (!isfinite(delta)) {
+    return false;
+  }
+
+  // In order to avoid re-traversing the whole sequence of gradient stops for
+  // each sub-span when searching for the pair of stops that affect it, we keep
+  // track a recent offset+index to start the search from.
+  int32_t initialIndex = 0;
+  // This is not the real offset, what matters is that it is lower than lowest
+  // stop offset (since we start searching at index 0).
+  float initialOffset = -1.0f;
+  for (; span > 0;) {
+    // The number of pixels that are affected by the current gradient stop pair.
+    float subSpan = span;
+
+    // Compute the gradient offset from the position.
+    Float offset = pos.x * scaleDir.x + pos.y * scaleDir.y - startOffset;
+    // If repeat is desired, we need to limit the offset to a fractional value.
+    if (gradientRepeat) {
+      offset = fract(offset);
+    }
+
+    int32_t stopIndex = 0;
+    float prevOffset = 0.0;
+    float nextOffset = 0.0;
+    if (offset.x < 0) {
+      // If before the start of the gradient stop range, then use the first
+      // stop.
+      if (delta > 0) {
+        subSpan = min(subSpan, -offset.x / delta);
+      }
+    } else if (offset.x >= 1) {
+      // If beyond the end of the gradient stop range, then use the last
+      // stop.
+      stopIndex = stopCount - 1;
+      if (delta < 0) {
+        subSpan = min(subSpan, (1.0f - offset.x) / delta);
+      }
+    } else {
+      // Otherwise, we're inside the gradient stop range. Find the pair
+      // that affect the start of the current block and how many blocks
+      // are affected by the same pair.
+      stopIndex =
+          findGradientStopPair(offset.x, stopOffsets, stopCount, initialIndex,
+                               initialOffset, prevOffset, nextOffset);
+      float offsetRange =
+          delta > 0.0f ? nextOffset - offset.x : prevOffset - offset.x;
+      subSpan = min(subSpan, offsetRange / delta);
+    }
+
+    // Ensure that we advance by at least a pixel.
+    subSpan = max(ceil(subSpan), 1.0f);
+
+    // Sample the start colors of the gradient stop pair. These are scaled to
+    // a range of 0..0xFF00, as that is the largest shifted value that can fit
+    // in a U16.  Since we are only doing addition with the step value, we can
+    // still represent negative step values without having to use an explicit
+    // sign bit, as the result will still come out the same, allowing us to gain
+    // an extra bit of precision. We will later shift these into 8 bit output
+    // range while committing the span, but stepping with higher precision to
+    // avoid banding. We convert from RGBA to BGRA here to avoid doing this in
+    // the inner loop.
+    // The 256 factor is a leftover from a previous version of this code that
+    // uses a 256 pixels gradient table. The math could be simplified to avoid
+    // it but this change requires careful consideration of its interactions
+    // with the dithering code.
+    auto colorScale = (DITHER ? float(0xFF00) : 255.0f) * 256.0f;
+    auto minColorF = stopColors[stopIndex].zyxw * colorScale;
+    auto maxColorF = stopColors[stopIndex + 1].zyxw * colorScale;
+    auto deltaOffset = nextOffset - prevOffset;
+    // Get the color range of the merged gradient, normalized to its size.
+    Float colorRangeF = deltaOffset == 0.0f
+                            ? Float(0.0f)
+                            : (maxColorF - minColorF) * (1.0 / deltaOffset);
+
+    // Compute the actual starting color of the current start offset within
+    // the merged gradient. The value 0.5 is added to the low bits (0x80) so
+    // that the color will effectively round to the nearest increment below.
+    auto colorF =
+        minColorF + colorRangeF * (offset.x - prevOffset) + float(0x80);
+
+    // Compute the portion of the color range that we advance on each chunk.
+    Float deltaColorF = colorRangeF * delta * CHUNK_SIZE;
+    // Quantize the color delta and current color. These have already been
+    // scaled to the 0..0xFF00 range, so we just need to round them to U16.
+    auto deltaColor = repeat4(CONVERT(round_pixel(deltaColorF, 1), U16));
+    // If there are any amount of whole chunks of a merged gradient found,
+    // then we want to process that as a single gradient span.
+    int chunks = int(subSpan) / 4;
+    if (chunks > 0) {
+      for (int remaining = chunks;;) {
+        auto color =
+            combine(CONVERT(round_pixel(colorF, 1), U16),
+                    CONVERT(round_pixel(colorF + deltaColorF * 0.25f, 1), U16),
+                    CONVERT(round_pixel(colorF + deltaColorF * 0.5f, 1), U16),
+                    CONVERT(round_pixel(colorF + deltaColorF * 0.75f, 1), U16));
+        // Finally, step the current color through the output chunks, shifting
+        // it into 8 bit range and outputting as we go. Only process a segment
+        // at a time to avoid overflowing 8-bit precision due to rounding of
+        // deltas.
+        int segment = min(remaining, 256 / 4);
+        for (auto* end = buf + segment * 4; buf < end; buf += 4) {
+          if (DITHER) {
+            commit_blend_span<BLEND>(
+                buf,
+                dither(color, currentFragCoordX, ditherNoiseYIndexed) >> 8);
+            currentFragCoordX += 4;
+          } else {
+            commit_blend_span<BLEND>(buf, bit_cast<WideRGBA8>(color >> 8));
+          }
+          color += deltaColor;
+        }
+        remaining -= segment;
+        colorF += deltaColorF * segment;
+        if (remaining <= 0) {
+          break;
+        }
+      }
+      span -= chunks * 4;
+      pos += posStep * float(chunks) * CHUNK_SIZE;
+    }
+
+    // We may have a partial chunk to write.
+    int remainder = int(subSpan - chunks * 4);
+    if (remainder > 0) {
+      assert(remainder < 4);
+      // The logic here is similar to the full chunks loop above, but we do a
+      // partial write instead of a pushing a full chunk.
+      auto color =
+          combine(CONVERT(round_pixel(colorF, 1), U16),
+                  CONVERT(round_pixel(colorF + deltaColorF * 0.25f, 1), U16),
+                  CONVERT(round_pixel(colorF + deltaColorF * 0.5f, 1), U16),
+                  CONVERT(round_pixel(colorF + deltaColorF * 0.75f, 1), U16));
+      if (DITHER) {
+        color = dither(color, currentFragCoordX, ditherNoiseYIndexed),
+        currentFragCoordX += remainder;
+      }
+      commit_blend_span<BLEND>(buf, bit_cast<WideRGBA8>(color >> 8), remainder);
+
+      buf += remainder;
+      span -= remainder;
+      pos += posStep * float(remainder);
+    }
+  }
+  return true;
+}
+
 // Commits an entire span of a linear gradient, given the address of a table
 // previously resolved with swgl_validateGradient. The size of the inner portion
 // of the table is given, assuming the table start and ends with a single entry
@@ -1845,6 +2024,46 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size,
     }                                                                        \
   } while (0)
 
+#define swgl_commitLinearGradientFromStopsRGBA8(                             \
+    sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos,       \
+    scaleDir, startOffset)                                                   \
+  do {                                                                       \
+    bool drawn = false;                                                      \
+    if (blend_key) {                                                         \
+      drawn = commitLinearGradientFromStops<true, false>(                    \
+          sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos, \
+          scaleDir, startOffset, swgl_OutRGBA8, swgl_SpanLength);            \
+    } else {                                                                 \
+      drawn = commitLinearGradientFromStops<false, false>(                   \
+          sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos, \
+          scaleDir, startOffset, swgl_OutRGBA8, swgl_SpanLength);            \
+    }                                                                        \
+    if (drawn) {                                                             \
+      swgl_OutRGBA8 += swgl_SpanLength;                                      \
+      swgl_SpanLength = 0;                                                   \
+    }                                                                        \
+  } while (0)
+
+#define swgl_commitDitheredLinearGradientFromStopsRGBA8(                      \
+    sampler, offsetsAddress, colorsAddress, size, tileRepeat, gradientRepeat, \
+    pos, scaleDir, startOffset)                                               \
+  do {                                                                        \
+    bool drawn = false;                                                       \
+    if (blend_key) {                                                          \
+      drawn = commitLinearGradientFromStops<true, true>(                      \
+          sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos,  \
+          scaleDir, startOffset, swgl_OutRGBA8, swgl_SpanLength, fragCoord);  \
+    } else {                                                                  \
+      drawn = commitLinearGradientFromStops<false, true>(                     \
+          sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos,  \
+          scaleDir, startOffset, swgl_OutRGBA8, swgl_SpanLength, fragCoord);  \
+    }                                                                         \
+    if (drawn) {                                                              \
+      swgl_OutRGBA8 += swgl_SpanLength;                                       \
+      swgl_SpanLength = 0;                                                    \
+    }                                                                         \
+  } while (0)
+
 template <bool CLAMP, typename V>
 static ALWAYS_INLINE V fastSqrt(V v) {
   if (CLAMP) {
@@ -2247,8 +2466,8 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
     // Ensure that we are advancing by at least one pixel at each iteration.
     endT = max(ceil(endT), t + 1.0f);
 
-    // Figure out how many pixels belonging to whole chunks are inside the gradient
-    // stop pair.
+    // Figure out how many pixels belonging to whole chunks are inside the
+    // gradient stop pair.
     int inside = int(endT - t) & ~3;
     // Convert start and end colors to BGRA and scale to 0..0xFF00 range
     // (for dithered) and 0.255 range (for non-dithered).
@@ -2270,8 +2489,7 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
             : (maxColorF - minColorF) / deltaOffset;
     // Subtract off the color difference of the beginning of the current span
     // from the beginning of the gradient.
-    Float colorF =
-        minColorF - deltaColorF * (adjustedStartRadius + prevOffset);
+    Float colorF = minColorF - deltaColorF * (adjustedStartRadius + prevOffset);
     // Finally, walk over the span accumulating the position dot product and
     // getting its sqrt as an offset into the color ramp. At this point we just
     // need to round to an integer and pack down to pixel format.
@@ -2333,32 +2551,32 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
       buf += remainder;
       t += remainder;
 
-      // dotPosDelta's members are monotonically increasing, so adjusting the step only
-      // requires undoing the factor of 4 and multiplying with the actual number of
-      // remainder pixels.
+      // dotPosDelta's members are monotonically increasing, so adjusting the
+      // step only requires undoing the factor of 4 and multiplying with the
+      // actual number of remainder pixels.
       float partialDeltaDelta2 = deltaDelta2 * 0.25f * float(remainder);
       dotPosDelta += partialDeltaDelta2;
 
-      // For dotPos, however, there is a compounding effect that makes the math trickier.
-      // For simplicity's sake we are just computing the the parameters for a single-pixel
-      // step and applying it remainder times.
+      // For dotPos, however, there is a compounding effect that makes the math
+      // trickier. For simplicity's sake we are just computing the the
+      // parameters for a single-pixel step and applying it remainder times.
 
-      // The deltaDelta2 for a single-pixel step (undoing the 4*4 factor we did earlier
-      // when making deltaDelta2 work for 4-pixels chunks).
+      // The deltaDelta2 for a single-pixel step (undoing the 4*4 factor we did
+      // earlier when making deltaDelta2 work for 4-pixels chunks).
       float singlePxDeltaDelta2 = deltaDelta2 * 0.0625f;
-      // The first single-pixel delta for dotPos (The difference between dotPos's first
-      // two lanes).
+      // The first single-pixel delta for dotPos (The difference between
+      // dotPos's first two lanes).
       float dotPosDeltaFirst = dotPos.y - dotPos.x;
-      // For each 1-pixel step the delta is applied and monotonically increased by
-      // singleDeltaDelta2.
+      // For each 1-pixel step the delta is applied and monotonically increased
+      // by singleDeltaDelta2.
       Float pxOffsets = {0.0f, 1.0f, 2.0f, 3.0f};
       Float partialDotPosDelta =
           pxOffsets * singlePxDeltaDelta2 + dotPosDeltaFirst;
 
       // Apply each single-pixel step.
       for (int i = 0; i < remainder; ++i) {
-          dotPos += partialDotPosDelta;
-          partialDotPosDelta += singlePxDeltaDelta2;
+        dotPos += partialDotPosDelta;
+        partialDotPosDelta += singlePxDeltaDelta2;
       }
     }
   }
diff --git a/gfx/wr/webrender/res/ps_quad_gradient.glsl b/gfx/wr/webrender/res/ps_quad_gradient.glsl
@@ -359,7 +359,8 @@ vec4 pattern_fragment(vec4 color) {
 
 #if defined(SWGL_DRAW_SPAN)
 void swgl_drawSpanRGBA8() {
-    if (v_gradient_header.x != GRADIENT_KIND_RADIAL) {
+    int kind = v_gradient_header.x;
+    if (kind != GRADIENT_KIND_LINEAR && kind != GRADIENT_KIND_RADIAL) {
         return;
     }
 
@@ -379,16 +380,30 @@ void swgl_drawSpanRGBA8() {
 
     int offsets_addr = colors_addr + stop_count * 4;
     vec2 pos = v_interpolated_data.xy;
-    float start_radius = v_flat_data.x;
     bool repeat = v_gradient_header.z != 0.0;
 
+    if (kind == GRADIENT_KIND_LINEAR) {
+        vec2 scale_dir = v_flat_data.xy;
+        float start_offset = v_flat_data.z;
+
 #ifdef WR_FEATURE_DITHERING
-    swgl_commitDitheredRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
-                                            stop_count, repeat, pos, start_radius, gl_FragCoord);
+        swgl_commitDitheredLinearGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
+            stop_count, repeat, pos, scale_dir, start_offset, gl_FragCoord);
 #else
-    swgl_commitRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
-                                            stop_count, repeat, pos, start_radius);
+        swgl_commitLinearGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
+            stop_count, repeat, pos, scale_dir, start_offset);
 #endif
+    } else if (kind == GRADIENT_KIND_RADIAL) {
+      float start_radius = v_flat_data.x;
+
+#ifdef WR_FEATURE_DITHERING
+      swgl_commitDitheredRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
+          stop_count, repeat, pos, start_radius, gl_FragCoord);
+#else
+      swgl_commitRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
+          stop_count, repeat, pos, start_radius);
+#endif
+    }
 }
 #endif
 
diff --git a/gfx/wr/webrender/src/prim_store/gradient/linear.rs b/gfx/wr/webrender/src/prim_store/gradient/linear.rs
@@ -13,7 +13,7 @@ use euclid::{point2, vec2, size2};
 use api::{ExtendMode, GradientStop, LineOrientation, PremultipliedColorF, ColorF, ColorU};
 use api::units::*;
 use crate::pattern::{Pattern, PatternBuilder, PatternBuilderContext, PatternBuilderState, PatternKind, PatternShaderInput, PatternTextureInput};
-use crate::prim_store::gradient::{gpu_gradient_stops_blocks, write_gpu_gradient_stops_tree, GradientKind};
+use crate::prim_store::gradient::{gpu_gradient_stops_blocks, write_gpu_gradient_stops_tree, write_gpu_gradient_stops_linear, GradientKind};
 use crate::scene_building::IsVisible;
 use crate::frame_builder::FrameBuildingState;
 use crate::intern::{Internable, InternDebug, Handle as InternHandle};
@@ -111,6 +111,7 @@ impl PatternBuilder for LinearGradientTemplate {
         } else {
             (self.start_point, self.end_point)
         };
+
         linear_gradient_pattern(
             start,
             end,
@@ -794,10 +795,10 @@ pub fn linear_gradient_pattern(
     end: DevicePoint,
     extend_mode: ExtendMode,
     stops: &[GradientStop],
-    _is_software: bool,
+    is_software: bool,
     gpu_buffer_builder: &mut GpuBufferBuilder
 ) -> Pattern {
-    let num_blocks = 2 + gpu_gradient_stops_blocks(stops.len(), true);
+    let num_blocks = 2 + gpu_gradient_stops_blocks(stops.len(), !is_software);
     let mut writer = gpu_buffer_builder.f32.write_blocks(num_blocks);
     writer.push_one([
         start.x,
@@ -812,7 +813,17 @@ pub fn linear_gradient_pattern(
         0.0,
     ]);
 
-    let is_opaque = write_gpu_gradient_stops_tree(stops, GradientKind::Linear, extend_mode, &mut writer);
+    let is_opaque = if is_software {
+        // The SWGL span shaders for precise gradients can incrementally search
+        // through the stops (each search starts from where the previous one
+        // landed). So it is more efficient to store them linearly in this
+        // configuration.
+        write_gpu_gradient_stops_linear(stops, GradientKind::Linear, extend_mode, &mut writer)
+    } else {
+        // On GPUs, each pixel does its own search so we greatly benefit from
+        // the tree traversal, especially when there are many stops.
+        write_gpu_gradient_stops_tree(stops, GradientKind::Linear, extend_mode, &mut writer)
+    };
     let gradient_address = writer.finish();
 
     Pattern {

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	gfx/wr/glsl-to-cxx/src/hir.rs	\|	16	++++++++++++++++
M	gfx/wr/swgl/src/swgl_ext.h	\|	256	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
M	gfx/wr/webrender/res/ps_quad_gradient.glsl	\|	27	+++++++++++++++++++++------
M	gfx/wr/webrender/src/prim_store/gradient/linear.rs	\|	19	+++++++++++++++----