tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit e55de54f2e2bbfc8e54884023fcf65af6406a291
parent 6ae6008259d9b6f7933d230627f0fcde2c949531
Author: Nicolas Silva <nical@fastmail.com>
Date:   Fri, 17 Oct 2025 08:18:25 +0000

Bug 1978773 - Implement the SWGL fast path for precise linear gradients. r=lsalzman"

Differential Revision: https://phabricator.services.mozilla.com/D268100

Diffstat:
Mgfx/wr/glsl-to-cxx/src/hir.rs | 16++++++++++++++++
Mgfx/wr/swgl/src/swgl_ext.h | 256+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
Mgfx/wr/webrender/res/ps_quad_gradient.glsl | 27+++++++++++++++++++++------
Mgfx/wr/webrender/src/prim_store/gradient/linear.rs | 19+++++++++++++++----
4 files changed, 289 insertions(+), 29 deletions(-)

diff --git a/gfx/wr/glsl-to-cxx/src/hir.rs b/gfx/wr/glsl-to-cxx/src/hir.rs @@ -3982,6 +3982,22 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio ); declare_function( state, + "swgl_commitLinearGradientFromStopsRGBA8", + None, + Type::new(Void), + vec![Type::new(Sampler2D), Type::new(Int), Type::new(Int), Type::new(Float), Type::new(Bool), + Type::new(Vec2), Type::new(Vec2), Type::new(Float)], + ); + declare_function( + state, + "swgl_commitDitheredLinearGradientFromStopsRGBA8", + None, + Type::new(Void), + vec![Type::new(Sampler2D), Type::new(Int), Type::new(Int), Type::new(Float), Type::new(Bool), + Type::new(Vec2), Type::new(Vec2), Type::new(Float), Type::new(Vec4)], + ); + declare_function( + state, "swgl_commitRadialGradientRGBA8", None, Type::new(Void), diff --git a/gfx/wr/swgl/src/swgl_ext.h b/gfx/wr/swgl/src/swgl_ext.h @@ -1349,7 +1349,8 @@ static inline int swgl_validateGradient(sampler2D sampler, ivec2_scalar address, : -1; } -static inline int swgl_validateGradientFromStops(sampler2D sampler, ivec2_scalar address, +static inline int swgl_validateGradientFromStops(sampler2D sampler, + ivec2_scalar address, int entries) { // 1px (4 floats per color stop). int colors_size = entries; @@ -1621,6 +1622,7 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size, distCoeffsY = vec2_scalar{step(0.0f, posStep.y), 1.0f} * recip(posStep.y); } } + for (; span > 0;) { // Try to process as many chunks as are within the span if possible. float chunks = 0.25f * span; @@ -1799,6 +1801,183 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size, return true; } +// Samples an entire span of a linear gradient. +template <bool BLEND, bool DITHER> +static bool commitLinearGradientFromStops(sampler2D sampler, int offsetsAddress, + int colorsAddress, float stopCount, + bool gradientRepeat, vec2 pos, + const vec2_scalar& scaleDir, + float startOffset, uint32_t* buf, + int span, vec4 fragCoord = vec4()) { + assert(sampler->format == TextureFormat::RGBA32F); + // Stop offsets are expected to be stored just after the colors. + assert(colorsAddress >= 0 && colorsAddress < offsetsAddress); + assert(offsetsAddress >= 0 && offsetsAddress + (stopCount + 3) / 4 < + int(sampler->height * sampler->stride)); + float* stopOffsets = (float*)&sampler->buf[offsetsAddress]; + Float* stopColors = (Float*)&sampler->buf[colorsAddress]; + + // Number of pixels per chunks. + const float CHUNK_SIZE = 4.0f; + + // Only incremented in the case of dithering + // Only incremented in the case of dithering + int32_t currentFragCoordX = int32_t(fragCoord.x.x); + const auto* ditherNoiseYIndexed = + DITHER ? getDitherNoise(int32_t(fragCoord.y.x)) : nullptr; + + // Get the pixel delta from the difference in offset steps. This represents + // how far within the gradient offset range we advance for every step in + // output. + vec2_scalar posStep = dFdx(pos); + float delta = dot(posStep, scaleDir); + if (!isfinite(delta)) { + return false; + } + + // In order to avoid re-traversing the whole sequence of gradient stops for + // each sub-span when searching for the pair of stops that affect it, we keep + // track a recent offset+index to start the search from. + int32_t initialIndex = 0; + // This is not the real offset, what matters is that it is lower than lowest + // stop offset (since we start searching at index 0). + float initialOffset = -1.0f; + for (; span > 0;) { + // The number of pixels that are affected by the current gradient stop pair. + float subSpan = span; + + // Compute the gradient offset from the position. + Float offset = pos.x * scaleDir.x + pos.y * scaleDir.y - startOffset; + // If repeat is desired, we need to limit the offset to a fractional value. + if (gradientRepeat) { + offset = fract(offset); + } + + int32_t stopIndex = 0; + float prevOffset = 0.0; + float nextOffset = 0.0; + if (offset.x < 0) { + // If before the start of the gradient stop range, then use the first + // stop. + if (delta > 0) { + subSpan = min(subSpan, -offset.x / delta); + } + } else if (offset.x >= 1) { + // If beyond the end of the gradient stop range, then use the last + // stop. + stopIndex = stopCount - 1; + if (delta < 0) { + subSpan = min(subSpan, (1.0f - offset.x) / delta); + } + } else { + // Otherwise, we're inside the gradient stop range. Find the pair + // that affect the start of the current block and how many blocks + // are affected by the same pair. + stopIndex = + findGradientStopPair(offset.x, stopOffsets, stopCount, initialIndex, + initialOffset, prevOffset, nextOffset); + float offsetRange = + delta > 0.0f ? nextOffset - offset.x : prevOffset - offset.x; + subSpan = min(subSpan, offsetRange / delta); + } + + // Ensure that we advance by at least a pixel. + subSpan = max(ceil(subSpan), 1.0f); + + // Sample the start colors of the gradient stop pair. These are scaled to + // a range of 0..0xFF00, as that is the largest shifted value that can fit + // in a U16. Since we are only doing addition with the step value, we can + // still represent negative step values without having to use an explicit + // sign bit, as the result will still come out the same, allowing us to gain + // an extra bit of precision. We will later shift these into 8 bit output + // range while committing the span, but stepping with higher precision to + // avoid banding. We convert from RGBA to BGRA here to avoid doing this in + // the inner loop. + // The 256 factor is a leftover from a previous version of this code that + // uses a 256 pixels gradient table. The math could be simplified to avoid + // it but this change requires careful consideration of its interactions + // with the dithering code. + auto colorScale = (DITHER ? float(0xFF00) : 255.0f) * 256.0f; + auto minColorF = stopColors[stopIndex].zyxw * colorScale; + auto maxColorF = stopColors[stopIndex + 1].zyxw * colorScale; + auto deltaOffset = nextOffset - prevOffset; + // Get the color range of the merged gradient, normalized to its size. + Float colorRangeF = deltaOffset == 0.0f + ? Float(0.0f) + : (maxColorF - minColorF) * (1.0 / deltaOffset); + + // Compute the actual starting color of the current start offset within + // the merged gradient. The value 0.5 is added to the low bits (0x80) so + // that the color will effectively round to the nearest increment below. + auto colorF = + minColorF + colorRangeF * (offset.x - prevOffset) + float(0x80); + + // Compute the portion of the color range that we advance on each chunk. + Float deltaColorF = colorRangeF * delta * CHUNK_SIZE; + // Quantize the color delta and current color. These have already been + // scaled to the 0..0xFF00 range, so we just need to round them to U16. + auto deltaColor = repeat4(CONVERT(round_pixel(deltaColorF, 1), U16)); + // If there are any amount of whole chunks of a merged gradient found, + // then we want to process that as a single gradient span. + int chunks = int(subSpan) / 4; + if (chunks > 0) { + for (int remaining = chunks;;) { + auto color = + combine(CONVERT(round_pixel(colorF, 1), U16), + CONVERT(round_pixel(colorF + deltaColorF * 0.25f, 1), U16), + CONVERT(round_pixel(colorF + deltaColorF * 0.5f, 1), U16), + CONVERT(round_pixel(colorF + deltaColorF * 0.75f, 1), U16)); + // Finally, step the current color through the output chunks, shifting + // it into 8 bit range and outputting as we go. Only process a segment + // at a time to avoid overflowing 8-bit precision due to rounding of + // deltas. + int segment = min(remaining, 256 / 4); + for (auto* end = buf + segment * 4; buf < end; buf += 4) { + if (DITHER) { + commit_blend_span<BLEND>( + buf, + dither(color, currentFragCoordX, ditherNoiseYIndexed) >> 8); + currentFragCoordX += 4; + } else { + commit_blend_span<BLEND>(buf, bit_cast<WideRGBA8>(color >> 8)); + } + color += deltaColor; + } + remaining -= segment; + colorF += deltaColorF * segment; + if (remaining <= 0) { + break; + } + } + span -= chunks * 4; + pos += posStep * float(chunks) * CHUNK_SIZE; + } + + // We may have a partial chunk to write. + int remainder = int(subSpan - chunks * 4); + if (remainder > 0) { + assert(remainder < 4); + // The logic here is similar to the full chunks loop above, but we do a + // partial write instead of a pushing a full chunk. + auto color = + combine(CONVERT(round_pixel(colorF, 1), U16), + CONVERT(round_pixel(colorF + deltaColorF * 0.25f, 1), U16), + CONVERT(round_pixel(colorF + deltaColorF * 0.5f, 1), U16), + CONVERT(round_pixel(colorF + deltaColorF * 0.75f, 1), U16)); + if (DITHER) { + color = dither(color, currentFragCoordX, ditherNoiseYIndexed), + currentFragCoordX += remainder; + } + commit_blend_span<BLEND>(buf, bit_cast<WideRGBA8>(color >> 8), remainder); + + buf += remainder; + span -= remainder; + pos += posStep * float(remainder); + } + } + return true; +} + // Commits an entire span of a linear gradient, given the address of a table // previously resolved with swgl_validateGradient. The size of the inner portion // of the table is given, assuming the table start and ends with a single entry @@ -1845,6 +2024,46 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size, } \ } while (0) +#define swgl_commitLinearGradientFromStopsRGBA8( \ + sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos, \ + scaleDir, startOffset) \ + do { \ + bool drawn = false; \ + if (blend_key) { \ + drawn = commitLinearGradientFromStops<true, false>( \ + sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos, \ + scaleDir, startOffset, swgl_OutRGBA8, swgl_SpanLength); \ + } else { \ + drawn = commitLinearGradientFromStops<false, false>( \ + sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos, \ + scaleDir, startOffset, swgl_OutRGBA8, swgl_SpanLength); \ + } \ + if (drawn) { \ + swgl_OutRGBA8 += swgl_SpanLength; \ + swgl_SpanLength = 0; \ + } \ + } while (0) + +#define swgl_commitDitheredLinearGradientFromStopsRGBA8( \ + sampler, offsetsAddress, colorsAddress, size, tileRepeat, gradientRepeat, \ + pos, scaleDir, startOffset) \ + do { \ + bool drawn = false; \ + if (blend_key) { \ + drawn = commitLinearGradientFromStops<true, true>( \ + sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos, \ + scaleDir, startOffset, swgl_OutRGBA8, swgl_SpanLength, fragCoord); \ + } else { \ + drawn = commitLinearGradientFromStops<false, true>( \ + sampler, offsetsAddress, colorsAddress, size, gradientRepeat, pos, \ + scaleDir, startOffset, swgl_OutRGBA8, swgl_SpanLength, fragCoord); \ + } \ + if (drawn) { \ + swgl_OutRGBA8 += swgl_SpanLength; \ + swgl_SpanLength = 0; \ + } \ + } while (0) + template <bool CLAMP, typename V> static ALWAYS_INLINE V fastSqrt(V v) { if (CLAMP) { @@ -2247,8 +2466,8 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress, // Ensure that we are advancing by at least one pixel at each iteration. endT = max(ceil(endT), t + 1.0f); - // Figure out how many pixels belonging to whole chunks are inside the gradient - // stop pair. + // Figure out how many pixels belonging to whole chunks are inside the + // gradient stop pair. int inside = int(endT - t) & ~3; // Convert start and end colors to BGRA and scale to 0..0xFF00 range // (for dithered) and 0.255 range (for non-dithered). @@ -2270,8 +2489,7 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress, : (maxColorF - minColorF) / deltaOffset; // Subtract off the color difference of the beginning of the current span // from the beginning of the gradient. - Float colorF = - minColorF - deltaColorF * (adjustedStartRadius + prevOffset); + Float colorF = minColorF - deltaColorF * (adjustedStartRadius + prevOffset); // Finally, walk over the span accumulating the position dot product and // getting its sqrt as an offset into the color ramp. At this point we just // need to round to an integer and pack down to pixel format. @@ -2333,32 +2551,32 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress, buf += remainder; t += remainder; - // dotPosDelta's members are monotonically increasing, so adjusting the step only - // requires undoing the factor of 4 and multiplying with the actual number of - // remainder pixels. + // dotPosDelta's members are monotonically increasing, so adjusting the + // step only requires undoing the factor of 4 and multiplying with the + // actual number of remainder pixels. float partialDeltaDelta2 = deltaDelta2 * 0.25f * float(remainder); dotPosDelta += partialDeltaDelta2; - // For dotPos, however, there is a compounding effect that makes the math trickier. - // For simplicity's sake we are just computing the the parameters for a single-pixel - // step and applying it remainder times. + // For dotPos, however, there is a compounding effect that makes the math + // trickier. For simplicity's sake we are just computing the the + // parameters for a single-pixel step and applying it remainder times. - // The deltaDelta2 for a single-pixel step (undoing the 4*4 factor we did earlier - // when making deltaDelta2 work for 4-pixels chunks). + // The deltaDelta2 for a single-pixel step (undoing the 4*4 factor we did + // earlier when making deltaDelta2 work for 4-pixels chunks). float singlePxDeltaDelta2 = deltaDelta2 * 0.0625f; - // The first single-pixel delta for dotPos (The difference between dotPos's first - // two lanes). + // The first single-pixel delta for dotPos (The difference between + // dotPos's first two lanes). float dotPosDeltaFirst = dotPos.y - dotPos.x; - // For each 1-pixel step the delta is applied and monotonically increased by - // singleDeltaDelta2. + // For each 1-pixel step the delta is applied and monotonically increased + // by singleDeltaDelta2. Float pxOffsets = {0.0f, 1.0f, 2.0f, 3.0f}; Float partialDotPosDelta = pxOffsets * singlePxDeltaDelta2 + dotPosDeltaFirst; // Apply each single-pixel step. for (int i = 0; i < remainder; ++i) { - dotPos += partialDotPosDelta; - partialDotPosDelta += singlePxDeltaDelta2; + dotPos += partialDotPosDelta; + partialDotPosDelta += singlePxDeltaDelta2; } } } diff --git a/gfx/wr/webrender/res/ps_quad_gradient.glsl b/gfx/wr/webrender/res/ps_quad_gradient.glsl @@ -359,7 +359,8 @@ vec4 pattern_fragment(vec4 color) { #if defined(SWGL_DRAW_SPAN) void swgl_drawSpanRGBA8() { - if (v_gradient_header.x != GRADIENT_KIND_RADIAL) { + int kind = v_gradient_header.x; + if (kind != GRADIENT_KIND_LINEAR && kind != GRADIENT_KIND_RADIAL) { return; } @@ -379,16 +380,30 @@ void swgl_drawSpanRGBA8() { int offsets_addr = colors_addr + stop_count * 4; vec2 pos = v_interpolated_data.xy; - float start_radius = v_flat_data.x; bool repeat = v_gradient_header.z != 0.0; + if (kind == GRADIENT_KIND_LINEAR) { + vec2 scale_dir = v_flat_data.xy; + float start_offset = v_flat_data.z; + #ifdef WR_FEATURE_DITHERING - swgl_commitDitheredRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr, - stop_count, repeat, pos, start_radius, gl_FragCoord); + swgl_commitDitheredLinearGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr, + stop_count, repeat, pos, scale_dir, start_offset, gl_FragCoord); #else - swgl_commitRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr, - stop_count, repeat, pos, start_radius); + swgl_commitLinearGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr, + stop_count, repeat, pos, scale_dir, start_offset); #endif + } else if (kind == GRADIENT_KIND_RADIAL) { + float start_radius = v_flat_data.x; + +#ifdef WR_FEATURE_DITHERING + swgl_commitDitheredRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr, + stop_count, repeat, pos, start_radius, gl_FragCoord); +#else + swgl_commitRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr, + stop_count, repeat, pos, start_radius); +#endif + } } #endif diff --git a/gfx/wr/webrender/src/prim_store/gradient/linear.rs b/gfx/wr/webrender/src/prim_store/gradient/linear.rs @@ -13,7 +13,7 @@ use euclid::{point2, vec2, size2}; use api::{ExtendMode, GradientStop, LineOrientation, PremultipliedColorF, ColorF, ColorU}; use api::units::*; use crate::pattern::{Pattern, PatternBuilder, PatternBuilderContext, PatternBuilderState, PatternKind, PatternShaderInput, PatternTextureInput}; -use crate::prim_store::gradient::{gpu_gradient_stops_blocks, write_gpu_gradient_stops_tree, GradientKind}; +use crate::prim_store::gradient::{gpu_gradient_stops_blocks, write_gpu_gradient_stops_tree, write_gpu_gradient_stops_linear, GradientKind}; use crate::scene_building::IsVisible; use crate::frame_builder::FrameBuildingState; use crate::intern::{Internable, InternDebug, Handle as InternHandle}; @@ -111,6 +111,7 @@ impl PatternBuilder for LinearGradientTemplate { } else { (self.start_point, self.end_point) }; + linear_gradient_pattern( start, end, @@ -794,10 +795,10 @@ pub fn linear_gradient_pattern( end: DevicePoint, extend_mode: ExtendMode, stops: &[GradientStop], - _is_software: bool, + is_software: bool, gpu_buffer_builder: &mut GpuBufferBuilder ) -> Pattern { - let num_blocks = 2 + gpu_gradient_stops_blocks(stops.len(), true); + let num_blocks = 2 + gpu_gradient_stops_blocks(stops.len(), !is_software); let mut writer = gpu_buffer_builder.f32.write_blocks(num_blocks); writer.push_one([ start.x, @@ -812,7 +813,17 @@ pub fn linear_gradient_pattern( 0.0, ]); - let is_opaque = write_gpu_gradient_stops_tree(stops, GradientKind::Linear, extend_mode, &mut writer); + let is_opaque = if is_software { + // The SWGL span shaders for precise gradients can incrementally search + // through the stops (each search starts from where the previous one + // landed). So it is more efficient to store them linearly in this + // configuration. + write_gpu_gradient_stops_linear(stops, GradientKind::Linear, extend_mode, &mut writer) + } else { + // On GPUs, each pixel does its own search so we greatly benefit from + // the tree traversal, especially when there are many stops. + write_gpu_gradient_stops_tree(stops, GradientKind::Linear, extend_mode, &mut writer) + }; let gradient_address = writer.finish(); Pattern {