commit e55de54f2e2bbfc8e54884023fcf65af6406a291
parent 6ae6008259d9b6f7933d230627f0fcde2c949531
Author: Nicolas Silva <nical@fastmail.com>
Date: Fri, 17 Oct 2025 08:18:25 +0000
Bug 1978773 - Implement the SWGL fast path for precise linear gradients. r=lsalzman"
Differential Revision: https://phabricator.services.mozilla.com/D268100
Diffstat:
4 files changed, 289 insertions(+), 29 deletions(-)
diff --git a/gfx/wr/glsl-to-cxx/src/hir.rs b/gfx/wr/glsl-to-cxx/src/hir.rs
@@ -3982,6 +3982,22 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio
);
declare_function(
state,
+ "swgl_commitLinearGradientFromStopsRGBA8",
+ None,
+ Type::new(Void),
+ vec![Type::new(Sampler2D), Type::new(Int), Type::new(Int), Type::new(Float), Type::new(Bool),
+ Type::new(Vec2), Type::new(Vec2), Type::new(Float)],
+ );
+ declare_function(
+ state,
+ "swgl_commitDitheredLinearGradientFromStopsRGBA8",
+ None,
+ Type::new(Void),
+ vec![Type::new(Sampler2D), Type::new(Int), Type::new(Int), Type::new(Float), Type::new(Bool),
+ Type::new(Vec2), Type::new(Vec2), Type::new(Float), Type::new(Vec4)],
+ );
+ declare_function(
+ state,
"swgl_commitRadialGradientRGBA8",
None,
Type::new(Void),
diff --git a/gfx/wr/swgl/src/swgl_ext.h b/gfx/wr/swgl/src/swgl_ext.h
@@ -1349,7 +1349,8 @@ static inline int swgl_validateGradient(sampler2D sampler, ivec2_scalar address,
: -1;
}
-static inline int swgl_validateGradientFromStops(sampler2D sampler, ivec2_scalar address,
+static inline int swgl_validateGradientFromStops(sampler2D sampler,
+ ivec2_scalar address,
int entries) {
// 1px (4 floats per color stop).
int colors_size = entries;
@@ -1621,6 +1622,7 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size,
distCoeffsY = vec2_scalar{step(0.0f, posStep.y), 1.0f} * recip(posStep.y);
}
}
+
for (; span > 0;) {
// Try to process as many chunks as are within the span if possible.
float chunks = 0.25f * span;
@@ -1799,6 +1801,183 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size,
return true;
}
+// Samples an entire span of a linear gradient.
+template <bool BLEND, bool DITHER>
+static bool commitLinearGradientFromStops(sampler2D sampler, int offsetsAddress,
+ int colorsAddress, float stopCount,
+ bool gradientRepeat, vec2 pos,
+ const vec2_scalar& scaleDir,
+ float startOffset, uint32_t* buf,
+ int span, vec4 fragCoord = vec4()) {
+ assert(sampler->format == TextureFormat::RGBA32F);
+ // Stop offsets are expected to be stored just after the colors.
+ assert(colorsAddress >= 0 && colorsAddress < offsetsAddress);
+ assert(offsetsAddress >= 0 && offsetsAddress + (stopCount + 3) / 4 <
+ int(sampler->height * sampler->stride));
+ float* stopOffsets = (float*)&sampler->buf[offsetsAddress];
+ Float* stopColors = (Float*)&sampler->buf[colorsAddress];
+
+ // Number of pixels per chunks.
+ const float CHUNK_SIZE = 4.0f;
+
+ // Only incremented in the case of dithering
+ // Only incremented in the case of dithering
+ int32_t currentFragCoordX = int32_t(fragCoord.x.x);
+ const auto* ditherNoiseYIndexed =
+ DITHER ? getDitherNoise(int32_t(fragCoord.y.x)) : nullptr;
+
+ // Get the pixel delta from the difference in offset steps. This represents
+ // how far within the gradient offset range we advance for every step in
+ // output.
+ vec2_scalar posStep = dFdx(pos);
+ float delta = dot(posStep, scaleDir);
+ if (!isfinite(delta)) {
+ return false;
+ }
+
+ // In order to avoid re-traversing the whole sequence of gradient stops for
+ // each sub-span when searching for the pair of stops that affect it, we keep
+ // track a recent offset+index to start the search from.
+ int32_t initialIndex = 0;
+ // This is not the real offset, what matters is that it is lower than lowest
+ // stop offset (since we start searching at index 0).
+ float initialOffset = -1.0f;
+ for (; span > 0;) {
+ // The number of pixels that are affected by the current gradient stop pair.
+ float subSpan = span;
+
+ // Compute the gradient offset from the position.
+ Float offset = pos.x * scaleDir.x + pos.y * scaleDir.y - startOffset;
+ // If repeat is desired, we need to limit the offset to a fractional value.
+ if (gradientRepeat) {
+ offset = fract(offset);
+ }
+
+ int32_t stopIndex = 0;
+ float prevOffset = 0.0;
+ float nextOffset = 0.0;
+ if (offset.x < 0) {
+ // If before the start of the gradient stop range, then use the first
+ // stop.
+ if (delta > 0) {
+ subSpan = min(subSpan, -offset.x / delta);
+ }
+ } else if (offset.x >= 1) {
+ // If beyond the end of the gradient stop range, then use the last
+ // stop.
+ stopIndex = stopCount - 1;
+ if (delta < 0) {
+ subSpan = min(subSpan, (1.0f - offset.x) / delta);
+ }
+ } else {
+ // Otherwise, we're inside the gradient stop range. Find the pair
+ // that affect the start of the current block and how many blocks
+ // are affected by the same pair.
+ stopIndex =
+ findGradientStopPair(offset.x, stopOffsets, stopCount, initialIndex,
+ initialOffset, prevOffset, nextOffset);
+ float offsetRange =
+ delta > 0.0f ? nextOffset - offset.x : prevOffset - offset.x;
+ subSpan = min(subSpan, offsetRange / delta);
+ }
+
+ // Ensure that we advance by at least a pixel.
+ subSpan = max(ceil(subSpan), 1.0f);
+
+ // Sample the start colors of the gradient stop pair. These are scaled to
+ // a range of 0..0xFF00, as that is the largest shifted value that can fit
+ // in a U16. Since we are only doing addition with the step value, we can
+ // still represent negative step values without having to use an explicit
+ // sign bit, as the result will still come out the same, allowing us to gain
+ // an extra bit of precision. We will later shift these into 8 bit output
+ // range while committing the span, but stepping with higher precision to
+ // avoid banding. We convert from RGBA to BGRA here to avoid doing this in
+ // the inner loop.
+ // The 256 factor is a leftover from a previous version of this code that
+ // uses a 256 pixels gradient table. The math could be simplified to avoid
+ // it but this change requires careful consideration of its interactions
+ // with the dithering code.
+ auto colorScale = (DITHER ? float(0xFF00) : 255.0f) * 256.0f;
+ auto minColorF = stopColors[stopIndex].zyxw * colorScale;
+ auto maxColorF = stopColors[stopIndex + 1].zyxw * colorScale;
+ auto deltaOffset = nextOffset - prevOffset;
+ // Get the color range of the merged gradient, normalized to its size.
+ Float colorRangeF = deltaOffset == 0.0f
+ ? Float(0.0f)
+ : (maxColorF - minColorF) * (1.0 / deltaOffset);
+
+ // Compute the actual starting color of the current start offset within
+ // the merged gradient. The value 0.5 is added to the low bits (0x80) so
+ // that the color will effectively round to the nearest increment below.
+ auto colorF =
+ minColorF + colorRangeF * (offset.x - prevOffset) + float(0x80);
+
+ // Compute the portion of the color range that we advance on each chunk.
+ Float deltaColorF = colorRangeF * delta * CHUNK_SIZE;
+ // Quantize the color delta and current color. These have already been
+ // scaled to the 0..0xFF00 range, so we just need to round them to U16.
+ auto deltaColor = repeat4(CONVERT(round_pixel(deltaColorF, 1), U16));
+ // If there are any amount of whole chunks of a merged gradient found,
+ // then we want to process that as a single gradient span.
+ int chunks = int(subSpan) / 4;
+ if (chunks > 0) {
+ for (int remaining = chunks;;) {
+ auto color =
+ combine(CONVERT(round_pixel(colorF, 1), U16),
+ CONVERT(round_pixel(colorF + deltaColorF * 0.25f, 1), U16),
+ CONVERT(round_pixel(colorF + deltaColorF * 0.5f, 1), U16),
+ CONVERT(round_pixel(colorF + deltaColorF * 0.75f, 1), U16));
+ // Finally, step the current color through the output chunks, shifting
+ // it into 8 bit range and outputting as we go. Only process a segment
+ // at a time to avoid overflowing 8-bit precision due to rounding of
+ // deltas.
+ int segment = min(remaining, 256 / 4);
+ for (auto* end = buf + segment * 4; buf < end; buf += 4) {
+ if (DITHER) {
+ commit_blend_span<BLEND>(
+ buf,
+ dither(color, currentFragCoordX, ditherNoiseYIndexed) >> 8);
+ currentFragCoordX += 4;
+ } else {
+ commit_blend_span<BLEND>(buf, bit_cast<WideRGBA8>(color >> 8));
+ }
+ color += deltaColor;
+ }
+ remaining -= segment;
+ colorF += deltaColorF * segment;
+ if (remaining <= 0) {
+ break;
+ }
+ }
+ span -= chunks * 4;
+ pos += posStep * float(chunks) * CHUNK_SIZE;
+ }
+
+ // We may have a partial chunk to write.
+ int remainder = int(subSpan - chunks * 4);
+ if (remainder > 0) {
+ assert(remainder < 4);
+ // The logic here is similar to the full chunks loop above, but we do a
+ // partial write instead of a pushing a full chunk.
+ auto color =
+ combine(CONVERT(round_pixel(colorF, 1), U16),
+ CONVERT(round_pixel(colorF + deltaColorF * 0.25f, 1), U16),
+ CONVERT(round_pixel(colorF + deltaColorF * 0.5f, 1), U16),
+ CONVERT(round_pixel(colorF + deltaColorF * 0.75f, 1), U16));
+ if (DITHER) {
+ color = dither(color, currentFragCoordX, ditherNoiseYIndexed),
+ currentFragCoordX += remainder;
+ }
+ commit_blend_span<BLEND>(buf, bit_cast<WideRGBA8>(color >> 8), remainder);
+
+ buf += remainder;
+ span -= remainder;
+ pos += posStep * float(remainder);
+ }
+ }
+ return true;
+}
+
// Commits an entire span of a linear gradient, given the address of a table
// previously resolved with swgl_validateGradient. The size of the inner portion
// of the table is given, assuming the table start and ends with a single entry
@@ -1845,6 +2024,46 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size,
} \
} while (0)
+#define swgl_commitLinearGradientFromStopsRGBA8( \
+ sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos, \
+ scaleDir, startOffset) \
+ do { \
+ bool drawn = false; \
+ if (blend_key) { \
+ drawn = commitLinearGradientFromStops<true, false>( \
+ sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos, \
+ scaleDir, startOffset, swgl_OutRGBA8, swgl_SpanLength); \
+ } else { \
+ drawn = commitLinearGradientFromStops<false, false>( \
+ sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos, \
+ scaleDir, startOffset, swgl_OutRGBA8, swgl_SpanLength); \
+ } \
+ if (drawn) { \
+ swgl_OutRGBA8 += swgl_SpanLength; \
+ swgl_SpanLength = 0; \
+ } \
+ } while (0)
+
+#define swgl_commitDitheredLinearGradientFromStopsRGBA8( \
+ sampler, offsetsAddress, colorsAddress, size, tileRepeat, gradientRepeat, \
+ pos, scaleDir, startOffset) \
+ do { \
+ bool drawn = false; \
+ if (blend_key) { \
+ drawn = commitLinearGradientFromStops<true, true>( \
+ sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos, \
+ scaleDir, startOffset, swgl_OutRGBA8, swgl_SpanLength, fragCoord); \
+ } else { \
+ drawn = commitLinearGradientFromStops<false, true>( \
+ sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos, \
+ scaleDir, startOffset, swgl_OutRGBA8, swgl_SpanLength, fragCoord); \
+ } \
+ if (drawn) { \
+ swgl_OutRGBA8 += swgl_SpanLength; \
+ swgl_SpanLength = 0; \
+ } \
+ } while (0)
+
template <bool CLAMP, typename V>
static ALWAYS_INLINE V fastSqrt(V v) {
if (CLAMP) {
@@ -2247,8 +2466,8 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
// Ensure that we are advancing by at least one pixel at each iteration.
endT = max(ceil(endT), t + 1.0f);
- // Figure out how many pixels belonging to whole chunks are inside the gradient
- // stop pair.
+ // Figure out how many pixels belonging to whole chunks are inside the
+ // gradient stop pair.
int inside = int(endT - t) & ~3;
// Convert start and end colors to BGRA and scale to 0..0xFF00 range
// (for dithered) and 0.255 range (for non-dithered).
@@ -2270,8 +2489,7 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
: (maxColorF - minColorF) / deltaOffset;
// Subtract off the color difference of the beginning of the current span
// from the beginning of the gradient.
- Float colorF =
- minColorF - deltaColorF * (adjustedStartRadius + prevOffset);
+ Float colorF = minColorF - deltaColorF * (adjustedStartRadius + prevOffset);
// Finally, walk over the span accumulating the position dot product and
// getting its sqrt as an offset into the color ramp. At this point we just
// need to round to an integer and pack down to pixel format.
@@ -2333,32 +2551,32 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
buf += remainder;
t += remainder;
- // dotPosDelta's members are monotonically increasing, so adjusting the step only
- // requires undoing the factor of 4 and multiplying with the actual number of
- // remainder pixels.
+ // dotPosDelta's members are monotonically increasing, so adjusting the
+ // step only requires undoing the factor of 4 and multiplying with the
+ // actual number of remainder pixels.
float partialDeltaDelta2 = deltaDelta2 * 0.25f * float(remainder);
dotPosDelta += partialDeltaDelta2;
- // For dotPos, however, there is a compounding effect that makes the math trickier.
- // For simplicity's sake we are just computing the the parameters for a single-pixel
- // step and applying it remainder times.
+ // For dotPos, however, there is a compounding effect that makes the math
+ // trickier. For simplicity's sake we are just computing the the
+ // parameters for a single-pixel step and applying it remainder times.
- // The deltaDelta2 for a single-pixel step (undoing the 4*4 factor we did earlier
- // when making deltaDelta2 work for 4-pixels chunks).
+ // The deltaDelta2 for a single-pixel step (undoing the 4*4 factor we did
+ // earlier when making deltaDelta2 work for 4-pixels chunks).
float singlePxDeltaDelta2 = deltaDelta2 * 0.0625f;
- // The first single-pixel delta for dotPos (The difference between dotPos's first
- // two lanes).
+ // The first single-pixel delta for dotPos (The difference between
+ // dotPos's first two lanes).
float dotPosDeltaFirst = dotPos.y - dotPos.x;
- // For each 1-pixel step the delta is applied and monotonically increased by
- // singleDeltaDelta2.
+ // For each 1-pixel step the delta is applied and monotonically increased
+ // by singleDeltaDelta2.
Float pxOffsets = {0.0f, 1.0f, 2.0f, 3.0f};
Float partialDotPosDelta =
pxOffsets * singlePxDeltaDelta2 + dotPosDeltaFirst;
// Apply each single-pixel step.
for (int i = 0; i < remainder; ++i) {
- dotPos += partialDotPosDelta;
- partialDotPosDelta += singlePxDeltaDelta2;
+ dotPos += partialDotPosDelta;
+ partialDotPosDelta += singlePxDeltaDelta2;
}
}
}
diff --git a/gfx/wr/webrender/res/ps_quad_gradient.glsl b/gfx/wr/webrender/res/ps_quad_gradient.glsl
@@ -359,7 +359,8 @@ vec4 pattern_fragment(vec4 color) {
#if defined(SWGL_DRAW_SPAN)
void swgl_drawSpanRGBA8() {
- if (v_gradient_header.x != GRADIENT_KIND_RADIAL) {
+ int kind = v_gradient_header.x;
+ if (kind != GRADIENT_KIND_LINEAR && kind != GRADIENT_KIND_RADIAL) {
return;
}
@@ -379,16 +380,30 @@ void swgl_drawSpanRGBA8() {
int offsets_addr = colors_addr + stop_count * 4;
vec2 pos = v_interpolated_data.xy;
- float start_radius = v_flat_data.x;
bool repeat = v_gradient_header.z != 0.0;
+ if (kind == GRADIENT_KIND_LINEAR) {
+ vec2 scale_dir = v_flat_data.xy;
+ float start_offset = v_flat_data.z;
+
#ifdef WR_FEATURE_DITHERING
- swgl_commitDitheredRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
- stop_count, repeat, pos, start_radius, gl_FragCoord);
+ swgl_commitDitheredLinearGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
+ stop_count, repeat, pos, scale_dir, start_offset, gl_FragCoord);
#else
- swgl_commitRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
- stop_count, repeat, pos, start_radius);
+ swgl_commitLinearGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
+ stop_count, repeat, pos, scale_dir, start_offset);
#endif
+ } else if (kind == GRADIENT_KIND_RADIAL) {
+ float start_radius = v_flat_data.x;
+
+#ifdef WR_FEATURE_DITHERING
+ swgl_commitDitheredRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
+ stop_count, repeat, pos, start_radius, gl_FragCoord);
+#else
+ swgl_commitRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
+ stop_count, repeat, pos, start_radius);
+#endif
+ }
}
#endif
diff --git a/gfx/wr/webrender/src/prim_store/gradient/linear.rs b/gfx/wr/webrender/src/prim_store/gradient/linear.rs
@@ -13,7 +13,7 @@ use euclid::{point2, vec2, size2};
use api::{ExtendMode, GradientStop, LineOrientation, PremultipliedColorF, ColorF, ColorU};
use api::units::*;
use crate::pattern::{Pattern, PatternBuilder, PatternBuilderContext, PatternBuilderState, PatternKind, PatternShaderInput, PatternTextureInput};
-use crate::prim_store::gradient::{gpu_gradient_stops_blocks, write_gpu_gradient_stops_tree, GradientKind};
+use crate::prim_store::gradient::{gpu_gradient_stops_blocks, write_gpu_gradient_stops_tree, write_gpu_gradient_stops_linear, GradientKind};
use crate::scene_building::IsVisible;
use crate::frame_builder::FrameBuildingState;
use crate::intern::{Internable, InternDebug, Handle as InternHandle};
@@ -111,6 +111,7 @@ impl PatternBuilder for LinearGradientTemplate {
} else {
(self.start_point, self.end_point)
};
+
linear_gradient_pattern(
start,
end,
@@ -794,10 +795,10 @@ pub fn linear_gradient_pattern(
end: DevicePoint,
extend_mode: ExtendMode,
stops: &[GradientStop],
- _is_software: bool,
+ is_software: bool,
gpu_buffer_builder: &mut GpuBufferBuilder
) -> Pattern {
- let num_blocks = 2 + gpu_gradient_stops_blocks(stops.len(), true);
+ let num_blocks = 2 + gpu_gradient_stops_blocks(stops.len(), !is_software);
let mut writer = gpu_buffer_builder.f32.write_blocks(num_blocks);
writer.push_one([
start.x,
@@ -812,7 +813,17 @@ pub fn linear_gradient_pattern(
0.0,
]);
- let is_opaque = write_gpu_gradient_stops_tree(stops, GradientKind::Linear, extend_mode, &mut writer);
+ let is_opaque = if is_software {
+ // The SWGL span shaders for precise gradients can incrementally search
+ // through the stops (each search starts from where the previous one
+ // landed). So it is more efficient to store them linearly in this
+ // configuration.
+ write_gpu_gradient_stops_linear(stops, GradientKind::Linear, extend_mode, &mut writer)
+ } else {
+ // On GPUs, each pixel does its own search so we greatly benefit from
+ // the tree traversal, especially when there are many stops.
+ write_gpu_gradient_stops_tree(stops, GradientKind::Linear, extend_mode, &mut writer)
+ };
let gradient_address = writer.finish();
Pattern {