[ tor-browser ].git.dasho

commit cad38fb97859273974da3ec9b2a9e393b5d73868
parent 49dbde2ae8f9985a4f049823542580bcc29398e9
Author: me <mr.alexander.n.hill@gmail.com>
Date:   Mon,  6 Oct 2025 17:58:46 +0000

Bug 627771 - Implement SWGL fast-path dithering for linear and radial gradients. r=lsalzman

Differential Revision: https://phabricator.services.mozilla.com/D265921

Diffstat:
M gfx/wr/glsl-to-cxx/src/hir.rs  | 24 ++++++++++++++++++++++++
M gfx/wr/swgl/src/swgl_ext.h  | 429 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
M gfx/wr/webrender/res/brush_linear_gradient.glsl  | 6 +++++-
M gfx/wr/webrender/res/cs_radial_gradient.glsl  | 8 ++++++--
M gfx/wr/webrender/res/ps_quad_gradient.glsl  | 5 +++++
M gfx/wr/webrender/res/ps_quad_radial_gradient.glsl  | 8 ++++++--

6 files changed, 395 insertions(+), 85 deletions(-)
diff --git a/gfx/wr/glsl-to-cxx/src/hir.rs b/gfx/wr/glsl-to-cxx/src/hir.rs
@@ -3966,6 +3966,14 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio
     );
     declare_function(
         state,
+        "swgl_commitDitheredLinearGradientRGBA8",
+        None,
+        Type::new(Void),
+        vec![Type::new(Sampler2D), Type::new(Int), Type::new(Float), Type::new(Bool), Type::new(Bool),
+             Type::new(Vec2), Type::new(Vec2), Type::new(Float), Type::new(Vec4)],
+    );
+    declare_function(
+        state,
         "swgl_commitRadialGradientRGBA8",
         None,
         Type::new(Void),
@@ -3974,6 +3982,14 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio
     );
     declare_function(
         state,
+        "swgl_commitDitheredRadialGradientRGBA8",
+        None,
+        Type::new(Void),
+        vec![Type::new(Sampler2D), Type::new(Int), Type::new(Float), Type::new(Bool), Type::new(Vec2),
+             Type::new(Float), Type::new(Vec4)],
+    );
+    declare_function(
+        state,
         "swgl_commitRadialGradientFromStopsRGBA8",
         None,
         Type::new(Void),
@@ -3982,6 +3998,14 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio
     );
     declare_function(
         state,
+        "swgl_commitDitheredRadialGradientFromStopsRGBA8",
+        None,
+        Type::new(Void),
+        vec![Type::new(Sampler2D), Type::new(Int), Type::new(Int), Type::new(Float), Type::new(Bool), Type::new(Vec2),
+             Type::new(Float), Type::new(Vec4)],
+    );
+    declare_function(
+        state,
         "swgl_commitGradientRGBA8",
         None,
         Type::new(Void),
diff --git a/gfx/wr/swgl/src/swgl_ext.h b/gfx/wr/swgl/src/swgl_ext.h
@@ -1384,14 +1384,122 @@ static inline WideRGBA8 sampleGradient(sampler2D sampler, int address,
   swgl_commitChunk(RGBA8, applyColor(sampleGradient(sampler, address, entry), \
                                      packColor(swgl_OutRGBA, color)))
 
+// Precomputed noise for adding directly to four horizontally contiguous pixels
+// TODO: These should be updated for parity with the shader dither
+// implementation once something more final exists there. Right now, these are
+// very close but slightly off.
+static const VectorType<int16_t, 16> ditherNoise[64] = {
+    {2, 2, 2, 128, 194, 194, 194, 128, 50, 50, 50, 128, 242, 242, 242, 128},
+    {194, 194, 194, 128, 50, 50, 50, 128, 242, 242, 242, 128, 14, 14, 14, 128},
+    {50, 50, 50, 128, 242, 242, 242, 128, 14, 14, 14, 128, 206, 206, 206, 128},
+    {242, 242, 242, 128, 14, 14, 14, 128, 206, 206, 206, 128, 62, 62, 62, 128},
+    {14, 14, 14, 128, 206, 206, 206, 128, 62, 62, 62, 128, 254, 254, 254, 128},
+    {206, 206, 206, 128, 62, 62, 62, 128, 254, 254, 254, 128, 130, 130, 130,
+     128},
+    {62, 62, 62, 128, 254, 254, 254, 128, 130, 130, 130, 128, 66, 66, 66, 128},
+    {254, 254, 254, 128, 130, 130, 130, 128, 66, 66, 66, 128, 178, 178, 178,
+     128},
+    {130, 130, 130, 128, 66, 66, 66, 128, 178, 178, 178, 128, 114, 114, 114,
+     128},
+    {66, 66, 66, 128, 178, 178, 178, 128, 114, 114, 114, 128, 142, 142, 142,
+     128},
+    {178, 178, 178, 128, 114, 114, 114, 128, 142, 142, 142, 128, 78, 78, 78,
+     128},
+    {114, 114, 114, 128, 142, 142, 142, 128, 78, 78, 78, 128, 190, 190, 190,
+     128},
+    {142, 142, 142, 128, 78, 78, 78, 128, 190, 190, 190, 128, 126, 126, 126,
+     128},
+    {78, 78, 78, 128, 190, 190, 190, 128, 126, 126, 126, 128, 34, 34, 34, 128},
+    {190, 190, 190, 128, 126, 126, 126, 128, 34, 34, 34, 128, 226, 226, 226,
+     128},
+    {126, 126, 126, 128, 34, 34, 34, 128, 226, 226, 226, 128, 18, 18, 18, 128},
+    {34, 34, 34, 128, 226, 226, 226, 128, 18, 18, 18, 128, 210, 210, 210, 128},
+    {226, 226, 226, 128, 18, 18, 18, 128, 210, 210, 210, 128, 46, 46, 46, 128},
+    {18, 18, 18, 128, 210, 210, 210, 128, 46, 46, 46, 128, 238, 238, 238, 128},
+    {210, 210, 210, 128, 46, 46, 46, 128, 238, 238, 238, 128, 30, 30, 30, 128},
+    {46, 46, 46, 128, 238, 238, 238, 128, 30, 30, 30, 128, 222, 222, 222, 128},
+    {238, 238, 238, 128, 30, 30, 30, 128, 222, 222, 222, 128, 162, 162, 162,
+     128},
+    {30, 30, 30, 128, 222, 222, 222, 128, 162, 162, 162, 128, 98, 98, 98, 128},
+    {222, 222, 222, 128, 162, 162, 162, 128, 98, 98, 98, 128, 146, 146, 146,
+     128},
+    {162, 162, 162, 128, 98, 98, 98, 128, 146, 146, 146, 128, 82, 82, 82, 128},
+    {98, 98, 98, 128, 146, 146, 146, 128, 82, 82, 82, 128, 174, 174, 174, 128},
+    {146, 146, 146, 128, 82, 82, 82, 128, 174, 174, 174, 128, 110, 110, 110,
+     128},
+    {82, 82, 82, 128, 174, 174, 174, 128, 110, 110, 110, 128, 158, 158, 158,
+     128},
+    {174, 174, 174, 128, 110, 110, 110, 128, 158, 158, 158, 128, 94, 94, 94,
+     128},
+    {110, 110, 110, 128, 158, 158, 158, 128, 94, 94, 94, 128, 10, 10, 10, 128},
+    {158, 158, 158, 128, 94, 94, 94, 128, 10, 10, 10, 128, 202, 202, 202, 128},
+    {94, 94, 94, 128, 10, 10, 10, 128, 202, 202, 202, 128, 58, 58, 58, 128},
+    {10, 10, 10, 128, 202, 202, 202, 128, 58, 58, 58, 128, 250, 250, 250, 128},
+    {202, 202, 202, 128, 58, 58, 58, 128, 250, 250, 250, 128, 6, 6, 6, 128},
+    {58, 58, 58, 128, 250, 250, 250, 128, 6, 6, 6, 128, 198, 198, 198, 128},
+    {250, 250, 250, 128, 6, 6, 6, 128, 198, 198, 198, 128, 54, 54, 54, 128},
+    {6, 6, 6, 128, 198, 198, 198, 128, 54, 54, 54, 128, 246, 246, 246, 128},
+    {198, 198, 198, 128, 54, 54, 54, 128, 246, 246, 246, 128, 138, 138, 138,
+     128},
+    {54, 54, 54, 128, 246, 246, 246, 128, 138, 138, 138, 128, 74, 74, 74, 128},
+    {246, 246, 246, 128, 138, 138, 138, 128, 74, 74, 74, 128, 186, 186, 186,
+     128},
+    {138, 138, 138, 128, 74, 74, 74, 128, 186, 186, 186, 128, 122, 122, 122,
+     128},
+    {74, 74, 74, 128, 186, 186, 186, 128, 122, 122, 122, 128, 134, 134, 134,
+     128},
+    {186, 186, 186, 128, 122, 122, 122, 128, 134, 134, 134, 128, 70, 70, 70,
+     128},
+    {122, 122, 122, 128, 134, 134, 134, 128, 70, 70, 70, 128, 182, 182, 182,
+     128},
+    {134, 134, 134, 128, 70, 70, 70, 128, 182, 182, 182, 128, 118, 118, 118,
+     128},
+    {70, 70, 70, 128, 182, 182, 182, 128, 118, 118, 118, 128, 42, 42, 42, 128},
+    {182, 182, 182, 128, 118, 118, 118, 128, 42, 42, 42, 128, 234, 234, 234,
+     128},
+    {118, 118, 118, 128, 42, 42, 42, 128, 234, 234, 234, 128, 26, 26, 26, 128},
+    {42, 42, 42, 128, 234, 234, 234, 128, 26, 26, 26, 128, 218, 218, 218, 128},
+    {234, 234, 234, 128, 26, 26, 26, 128, 218, 218, 218, 128, 38, 38, 38, 128},
+    {26, 26, 26, 128, 218, 218, 218, 128, 38, 38, 38, 128, 230, 230, 230, 128},
+    {218, 218, 218, 128, 38, 38, 38, 128, 230, 230, 230, 128, 22, 22, 22, 128},
+    {38, 38, 38, 128, 230, 230, 230, 128, 22, 22, 22, 128, 214, 214, 214, 128},
+    {230, 230, 230, 128, 22, 22, 22, 128, 214, 214, 214, 128, 170, 170, 170,
+     128},
+    {22, 22, 22, 128, 214, 214, 214, 128, 170, 170, 170, 128, 106, 106, 106,
+     128},
+    {214, 214, 214, 128, 170, 170, 170, 128, 106, 106, 106, 128, 154, 154, 154,
+     128},
+    {170, 170, 170, 128, 106, 106, 106, 128, 154, 154, 154, 128, 90, 90, 90,
+     128},
+    {106, 106, 106, 128, 154, 154, 154, 128, 90, 90, 90, 128, 166, 166, 166,
+     128},
+    {154, 154, 154, 128, 90, 90, 90, 128, 166, 166, 166, 128, 102, 102, 102,
+     128},
+    {90, 90, 90, 128, 166, 166, 166, 128, 102, 102, 102, 128, 150, 150, 150,
+     128},
+    {166, 166, 166, 128, 102, 102, 102, 128, 150, 150, 150, 128, 86, 86, 86,
+     128},
+    {102, 102, 102, 128, 150, 150, 150, 128, 86, 86, 86, 128, 2, 2, 2, 128},
+    {150, 150, 150, 128, 86, 86, 86, 128, 2, 2, 2, 128, 194, 194, 194, 128},
+    {86, 86, 86, 128, 2, 2, 2, 128, 194, 194, 194, 128, 50, 50, 50, 128}};
+
+// Values in color should be in the 0x00FF..0xFF00 range so that dithering has
+// enough overhead to avoid overflow and underflow.
+static inline VectorType<uint16_t, 4 * 4> dither(
+    VectorType<uint16_t, 4 * 4> color, ivec4_scalar fragCoord,
+    const VectorType<int16_t, 16>* ditherNoiseYIndexed) {
+  color += ditherNoiseYIndexed[fragCoord.x & 7];
+  return color;
+}
+
 // Samples an entire span of a linear gradient by crawling the gradient table
 // and looking for consecutive stops that can be merged into a single larger
 // gradient, then interpolating between those larger gradients within the span.
-template <bool BLEND>
+template <bool BLEND, bool DITHER>
 static bool commitLinearGradient(sampler2D sampler, int address, float size,
                                  bool tileRepeat, bool gradientRepeat, vec2 pos,
                                  const vec2_scalar& scaleDir, float startOffset,
-                                 uint32_t* buf, int span) {
+                                 uint32_t* buf, int span, vec4 fragCoord) {
   assert(sampler->format == TextureFormat::RGBA32F);
   assert(address >= 0 && address < int(sampler->height * sampler->stride));
   GradientStops* stops = (GradientStops*)&sampler->buf[address];
@@ -1403,6 +1511,16 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size,
   if (!isfinite(delta)) {
     return false;
   }
+
+  // Only incremented in the case of dithering
+  ivec4_scalar currentFragCoord =
+      ivec4_scalar(fragCoord.x.x, fragCoord.y.x, fragCoord.z.x, fragCoord.w.x);
+
+  const VectorType<int16_t, 16>* ditherNoiseYIndexed;
+  if (DITHER) {
+    ditherNoiseYIndexed = &ditherNoise[((uint32_t)fragCoord.y.x & 7) * 8];
+  }
+
   // If we have a repeating brush, then the position will be modulo the [0,1)
   // interval. Compute coefficients that can be used to quickly evaluate the
   // distance to the interval boundary where the offset will wrap.
@@ -1501,17 +1619,20 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size,
     if (chunks >= 1.0f) {
       int inside = int(chunks);
       // Sample the start color from the min entry and the end color from the
-      // max entry of the merged gradient. These are scaled to a range of
-      // 0..0xFF00, as that is the largest shifted value that can fit in a U16.
-      // Since we are only doing addition with the step value, we can still
-      // represent negative step values without having to use an explicit sign
-      // bit, as the result will still come out the same, allowing us to gain an
-      // extra bit of precision. We will later shift these into 8 bit output
-      // range while committing the span, but stepping with higher precision to
-      // avoid banding. We convert from RGBA to BGRA here to avoid doing this in
-      // the inner loop.
+      // max entry of the merged gradient. For non-dithered, these are scaled to
+      // a range of 0..0xFF00, as that is the largest shifted value that can fit
+      // in a U16. For dithered, these are scaled to a range of 0x00FF-0xFF00 to
+      // allow room to avoid overflow and underflow when applying the dither
+      // pattern. Since we are only doing addition with the step value, we can
+      // still represent negative step values without having to use an explicit
+      // sign bit, as the result will still come out the same, allowing us to
+      // gain an extra bit of precision. We will later shift these into 8 bit
+      // output range while committing the span, but stepping with higher
+      // precision to avoid banding. We convert from RGBA to BGRA here to avoid
+      // doing this in the inner loop.
       auto minColorF = stops[minIndex].startColor.zyxw * float(0xFF00);
       auto maxColorF = stops[maxIndex].end_color().zyxw * float(0xFF00);
+
       // Get the color range of the merged gradient, normalized to its size.
       auto colorRangeF =
           (maxColorF - minColorF) * (1.0f / (maxIndex + 1 - minIndex));
@@ -1537,7 +1658,15 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size,
         // deltas.
         int segment = min(remaining, 256 / 4);
         for (auto* end = buf + segment * 4; buf < end; buf += 4) {
-          commit_blend_span<BLEND>(buf, bit_cast<WideRGBA8>(color >> 8));
+          if (DITHER) {
+            commit_blend_span<BLEND>(
+                buf,
+                bit_cast<WideRGBA8>(
+                    dither(color, currentFragCoord, ditherNoiseYIndexed) >> 8));
+            currentFragCoord.x += 4;
+          } else {
+            commit_blend_span<BLEND>(buf, bit_cast<WideRGBA8>(color >> 8));
+          }
           color += deltaColor;
         }
         remaining -= segment;
@@ -1570,7 +1699,19 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size,
     // will calculate a table entry for each sample, assuming the samples may
     // have different table entries.
     Float entry = clamp(offset * size + 1.0f, 0.0f, 1.0f + size);
-    commit_blend_span<BLEND>(buf, sampleGradient(sampler, address, entry));
+    if (DITHER) {
+      auto gradientSample = static_cast<VectorType<uint16_t, 4 * 4>>(
+                                sampleGradient(sampler, address, entry))
+                            << 8;
+      commit_blend_span<BLEND>(
+          buf, static_cast<WideRGBA8>(dither(gradientSample, currentFragCoord,
+                                             ditherNoiseYIndexed) >>
+                                      8));
+      currentFragCoord.x += 4;
+    } else {
+      commit_blend_span<BLEND>(
+          buf, static_cast<WideRGBA8>(sampleGradient(sampler, address, entry)));
+    }
     span -= 4;
     buf += 4;
     pos += posStep;
@@ -1590,13 +1731,35 @@ static bool commitLinearGradient(sampler2D sampler, int address, float size,
   do {                                                                       \
     bool drawn = false;                                                      \
     if (blend_key) {                                                         \
-      drawn = commitLinearGradient<true>(                                    \
+      drawn = commitLinearGradient<true, false>(                             \
+          sampler, address, size, tileRepeat, gradientRepeat, pos, scaleDir, \
+          startOffset, swgl_OutRGBA8, swgl_SpanLength,                       \
+          static_cast<vec4>(0x0));                                           \
+    } else {                                                                 \
+      drawn = commitLinearGradient<false, false>(                            \
+          sampler, address, size, tileRepeat, gradientRepeat, pos, scaleDir, \
+          startOffset, swgl_OutRGBA8, swgl_SpanLength,                       \
+          static_cast<vec4>(0x0));                                           \
+    }                                                                        \
+    if (drawn) {                                                             \
+      swgl_OutRGBA8 += swgl_SpanLength;                                      \
+      swgl_SpanLength = 0;                                                   \
+    }                                                                        \
+  } while (0)
+
+#define swgl_commitDitheredLinearGradientRGBA8(                              \
+    sampler, address, size, tileRepeat, gradientRepeat, pos, scaleDir,       \
+    startOffset, fragCoord)                                                  \
+  do {                                                                       \
+    bool drawn = false;                                                      \
+    if (blend_key) {                                                         \
+      drawn = commitLinearGradient<true, true>(                              \
           sampler, address, size, tileRepeat, gradientRepeat, pos, scaleDir, \
-          startOffset, swgl_OutRGBA8, swgl_SpanLength);                      \
+          startOffset, swgl_OutRGBA8, swgl_SpanLength, fragCoord);           \
     } else {                                                                 \
-      drawn = commitLinearGradient<false>(                                   \
+      drawn = commitLinearGradient<false, true>(                             \
           sampler, address, size, tileRepeat, gradientRepeat, pos, scaleDir, \
-          startOffset, swgl_OutRGBA8, swgl_SpanLength);                      \
+          startOffset, swgl_OutRGBA8, swgl_SpanLength, fragCoord);           \
     }                                                                        \
     if (drawn) {                                                             \
       swgl_OutRGBA8 += swgl_SpanLength;                                      \
@@ -1626,10 +1789,10 @@ static ALWAYS_INLINE auto fastLength(V v) {
 // and looking for consecutive stops that can be merged into a single larger
 // gradient, then interpolating between those larger gradients within the span
 // based on the computed position relative to a radius.
-template <bool BLEND>
+template <bool BLEND, bool DITHER>
 static bool commitRadialGradient(sampler2D sampler, int address, float size,
                                  bool repeat, vec2 pos, float radius,
-                                 uint32_t* buf, int span) {
+                                 uint32_t* buf, int span, vec4 fragCoord) {
   assert(sampler->format == TextureFormat::RGBA32F);
   assert(address >= 0 && address < int(sampler->height * sampler->stride));
   GradientStops* stops = (GradientStops*)&sampler->buf[address];
@@ -1659,6 +1822,16 @@ static bool commitRadialGradient(sampler2D sampler, int address, float size,
   if (!isfinite(deltaDelta) || !isfinite(radius)) {
     return false;
   }
+
+  // Only incremented in the case of dithering
+  ivec4_scalar currentFragCoord =
+      ivec4_scalar(fragCoord.x.x, fragCoord.y.x, fragCoord.z.x, fragCoord.w.x);
+
+  const VectorType<int16_t, 16>* ditherNoiseYIndexed;
+  if (DITHER) {
+    ditherNoiseYIndexed = &ditherNoise[((uint32_t)fragCoord.y.x & 7) * 8];
+  }
+
   float invDelta, middleT, middleB;
   if (deltaDelta > 0) {
     invDelta = 1.0f / deltaDelta;
@@ -1779,9 +1952,13 @@ static bool commitRadialGradient(sampler2D sampler, int address, float size,
     // Figure out how many chunks are actually inside the merged gradient.
     if (t + 4.0f <= endT) {
       int inside = int(endT - t) & ~3;
-      // Convert start and end colors to BGRA and scale to 0..255 range later.
-      auto minColorF = stops[minIndex].startColor.zyxw * 255.0f;
-      auto maxColorF = stops[maxIndex].end_color().zyxw * 255.0f;
+      // Convert start and end colors to BGRA and scale to 0x00FF..0xFF00 range
+      // (for dithered) or 0..255 (for non-dithered) later.
+      auto minColorF =
+          stops[minIndex].startColor.zyxw * (DITHER ? float(0xFF00) : 255.0f);
+      auto maxColorF =
+          stops[maxIndex].end_color().zyxw * (DITHER ? float(0xFF00) : 255.0f);
+
       // Compute the change in color per change in gradient offset.
       auto deltaColorF =
           (maxColorF - minColorF) * (size / (maxIndex + 1 - minIndex));
@@ -1790,18 +1967,30 @@ static bool commitRadialGradient(sampler2D sampler, int address, float size,
       Float colorF =
           minColorF - deltaColorF * (startRadius + (minIndex - 1) / size);
       // Finally, walk over the span accumulating the position dot product and
-      // getting its sqrt as an offset into the color ramp. Since we're already
-      // in BGRA format and scaled to 255, we just need to round to an integer
-      // and pack down to pixel format.
+      // getting its sqrt as an offset into the color ramp. At this point we
+      // just need to round to an integer and pack down to pixel format.
       for (auto* end = buf + inside; buf < end; buf += 4) {
         Float offsetG = fastSqrt<false>(dotPos);
-        commit_blend_span<BLEND>(
-            buf,
-            combine(
-                packRGBA8(round_pixel(colorF + deltaColorF * offsetG.x, 1),
-                          round_pixel(colorF + deltaColorF * offsetG.y, 1)),
-                packRGBA8(round_pixel(colorF + deltaColorF * offsetG.z, 1),
-                          round_pixel(colorF + deltaColorF * offsetG.w, 1))));
+        if (DITHER) {
+          auto color = combine(
+              CONVERT(round_pixel(colorF + deltaColorF * offsetG.x, 1), U16),
+              CONVERT(round_pixel(colorF + deltaColorF * offsetG.y, 1), U16),
+              CONVERT(round_pixel(colorF + deltaColorF * offsetG.z, 1), U16),
+              CONVERT(round_pixel(colorF + deltaColorF * offsetG.w, 1), U16));
+          commit_blend_span<BLEND>(
+              buf,
+              static_cast<WideRGBA8>(
+                  dither(color, currentFragCoord, ditherNoiseYIndexed) >> 8));
+          currentFragCoord.x += 4;
+        } else {
+          auto color = combine(
+              packRGBA8(round_pixel(colorF + deltaColorF * offsetG.x, 1),
+                        round_pixel(colorF + deltaColorF * offsetG.y, 1)),
+              packRGBA8(round_pixel(colorF + deltaColorF * offsetG.z, 1),
+                        round_pixel(colorF + deltaColorF * offsetG.w, 1)));
+          commit_blend_span<BLEND>(buf, color);
+        }
+
         dotPos += dotPosDelta;
         dotPosDelta += deltaDelta2;
       }
@@ -1908,11 +2097,12 @@ static int32_t findGradientStopPair(float offset, float* stops,
 }
 
 // Samples an entire span of a radial gradient.
-template <bool BLEND>
+template <bool BLEND, bool DITHER>
 static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
                                           int colorsAddress, float stopCount,
-                                          bool repeat, vec2 pos, float startRadius,
-                                          uint32_t* buf, int span) {
+                                          bool repeat, vec2 pos,
+                                          float startRadius, uint32_t* buf,
+                                          int span, vec4 fragCoord) {
   assert(sampler->format == TextureFormat::RGBA32F);
   // Stop offsets are expected to be stored just after the colors.
   assert(colorsAddress >= 0 && colorsAddress < offsetsAddress);
@@ -1946,6 +2136,16 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
   if (!isfinite(deltaDelta) || !isfinite(startRadius)) {
     return false;
   }
+
+  // Only incremented in the case of dithering
+  ivec4_scalar currentFragCoord =
+      ivec4_scalar(fragCoord.x.x, fragCoord.y.x, fragCoord.z.x, fragCoord.w.x);
+
+  const VectorType<int16_t, 16>* ditherNoiseYIndexed;
+  if (DITHER) {
+    ditherNoiseYIndexed = &ditherNoise[((uint32_t)fragCoord.y.x & 7) * 8];
+  }
+
   float invDelta, middleT, middleB;
   if (deltaDelta > 0) {
     invDelta = 1.0f / deltaDelta;
@@ -2054,9 +2254,13 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
     // Figure out how many pixels belonging to whole chunks are inside the gradient
     // stop pair.
     int inside = int(endT - t) & ~3;
-    // Convert start and end colors to BGRA and scale to 0..255 range.
-    auto minColorF = stopColors[stopIndex].zyxw * 255.0f;
-    auto maxColorF = stopColors[stopIndex + 1].zyxw * 255.0f;
+    // Convert start and end colors to BGRA and scale to 0x00FF..0xFF00 range
+    // (for dithered) and 0.255 range (for non-dithered).
+    auto minColorF =
+        stopColors[stopIndex].zyxw * (DITHER ? float(0xFF00) : 255.0f);
+    auto maxColorF =
+        stopColors[stopIndex + 1].zyxw * (DITHER ? float(0xFF00) : 255.0f);
+
     // Compute the change in color per change in gradient offset.
     auto deltaOffset = nextOffset - prevOffset;
     Float deltaColorF =
@@ -2073,17 +2277,29 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
     Float colorF =
         minColorF - deltaColorF * (adjustedStartRadius + prevOffset);
     // Finally, walk over the span accumulating the position dot product and
-    // getting its sqrt as an offset into the color ramp. Since we're already
-    // in BGRA format and scaled to 255, we just need to round to an integer
-    // and pack down to pixel format.
+    // getting its sqrt as an offset into the color ramp. At this point we just
+    // need to round to an integer and pack down to pixel format.
     for (auto* end = buf + inside; buf < end; buf += 4) {
       Float offsetG = fastSqrt<false>(dotPos);
-      commit_blend_span<BLEND>(
-          buf,
-          combine(packRGBA8(round_pixel(colorF + deltaColorF * offsetG.x, 1),
-                            round_pixel(colorF + deltaColorF * offsetG.y, 1)),
-                  packRGBA8(round_pixel(colorF + deltaColorF * offsetG.z, 1),
-                            round_pixel(colorF + deltaColorF * offsetG.w, 1))));
+      if (DITHER) {
+        auto color = combine(
+            CONVERT(round_pixel(colorF + deltaColorF * offsetG.x, 1), U16),
+            CONVERT(round_pixel(colorF + deltaColorF * offsetG.y, 1), U16),
+            CONVERT(round_pixel(colorF + deltaColorF * offsetG.z, 1), U16),
+            CONVERT(round_pixel(colorF + deltaColorF * offsetG.w, 1), U16));
+        commit_blend_span<BLEND>(
+            buf,
+            static_cast<WideRGBA8>(
+                dither(color, currentFragCoord, ditherNoiseYIndexed) >> 8));
+        currentFragCoord.x += 4;
+      } else {
+        auto color = combine(
+            packRGBA8(round_pixel(colorF + deltaColorF * offsetG.x, 1),
+                      round_pixel(colorF + deltaColorF * offsetG.y, 1)),
+            packRGBA8(round_pixel(colorF + deltaColorF * offsetG.z, 1),
+                      round_pixel(colorF + deltaColorF * offsetG.w, 1)));
+        commit_blend_span<BLEND>(buf, color);
+      }
       dotPos += dotPosDelta;
       dotPosDelta += deltaDelta2;
     }
@@ -2102,14 +2318,26 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
       // The logic here is similar to the full chunks loop above, but we do a
       // partial write instead of a pushing a full chunk.
       Float offsetG = fastSqrt<false>(dotPos);
-      commit_blend_span<BLEND>(
-          buf,
-          combine(packRGBA8(round_pixel(colorF + deltaColorF * offsetG.x, 1),
-                            round_pixel(colorF + deltaColorF * offsetG.y, 1)),
-                  packRGBA8(round_pixel(colorF + deltaColorF * offsetG.z, 1),
-                            round_pixel(colorF + deltaColorF * offsetG.w, 1))),
-          remainder);
-
+      if (DITHER) {
+        auto color = combine(
+            CONVERT(round_pixel(colorF + deltaColorF * offsetG.x, 1), U16),
+            CONVERT(round_pixel(colorF + deltaColorF * offsetG.y, 1), U16),
+            CONVERT(round_pixel(colorF + deltaColorF * offsetG.z, 1), U16),
+            CONVERT(round_pixel(colorF + deltaColorF * offsetG.w, 1), U16));
+        commit_blend_span<BLEND>(
+            buf,
+            static_cast<WideRGBA8>(
+                dither(color, currentFragCoord, ditherNoiseYIndexed) >> 8),
+            remainder);
+        currentFragCoord.x += 4;
+      } else {
+        auto color = combine(
+            packRGBA8(round_pixel(colorF + deltaColorF * offsetG.x, 1),
+                      round_pixel(colorF + deltaColorF * offsetG.y, 1)),
+            packRGBA8(round_pixel(colorF + deltaColorF * offsetG.z, 1),
+                      round_pixel(colorF + deltaColorF * offsetG.w, 1)));
+        commit_blend_span<BLEND>(buf, color, remainder);
+      }
       buf += remainder;
       t += remainder;
 
@@ -2149,18 +2377,37 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
 // swglcommitLinearGradient, but given a varying 2D position scaled to
 // gradient-space and a radius at which the distance from the origin maps to the
 // start of the gradient table.
-#define swgl_commitRadialGradientRGBA8(sampler, address, size, repeat, pos,    \
-                                       radius)                                 \
+#define swgl_commitRadialGradientRGBA8(sampler, address, size, repeat, pos, \
+                                       radius)                              \
+  do {                                                                      \
+    bool drawn = false;                                                     \
+    if (blend_key) {                                                        \
+      drawn = commitRadialGradient<true, false>(                            \
+          sampler, address, size, repeat, pos, radius, swgl_OutRGBA8,       \
+          swgl_SpanLength, static_cast<vec4>(0x0));                         \
+    } else {                                                                \
+      drawn = commitRadialGradient<false, false>(                           \
+          sampler, address, size, repeat, pos, radius, swgl_OutRGBA8,       \
+          swgl_SpanLength, static_cast<vec4>(0x0));                         \
+    }                                                                       \
+    if (drawn) {                                                            \
+      swgl_OutRGBA8 += swgl_SpanLength;                                     \
+      swgl_SpanLength = 0;                                                  \
+    }                                                                       \
+  } while (0)
+
+#define swgl_commitDitheredRadialGradientRGBA8(sampler, address, size, repeat, \
+                                               pos, radius, fragCoord)         \
   do {                                                                         \
     bool drawn = false;                                                        \
     if (blend_key) {                                                           \
-      drawn =                                                                  \
-          commitRadialGradient<true>(sampler, address, size, repeat, pos,      \
-                                     radius, swgl_OutRGBA8, swgl_SpanLength);  \
+      drawn = commitRadialGradient<true, true>(sampler, address, size, repeat, \
+                                               pos, radius, swgl_OutRGBA8,     \
+                                               swgl_SpanLength, fragCoord);    \
     } else {                                                                   \
-      drawn =                                                                  \
-          commitRadialGradient<false>(sampler, address, size, repeat, pos,     \
-                                      radius, swgl_OutRGBA8, swgl_SpanLength); \
+      drawn = commitRadialGradient<false, true>(                               \
+          sampler, address, size, repeat, pos, radius, swgl_OutRGBA8,          \
+          swgl_SpanLength, fragCoord);                                         \
     }                                                                          \
     if (drawn) {                                                               \
       swgl_OutRGBA8 += swgl_SpanLength;                                        \
@@ -2172,23 +2419,45 @@ static bool commitRadialGradientFromStops(sampler2D sampler, int offsetsAddress,
 // swglcommitLinearGradient, but given a varying 2D position scaled to
 // gradient-space and a radius at which the distance from the origin maps to the
 // start of the gradient table.
-#define swgl_commitRadialGradientFromStopsRGBA8(                             \
-    sampler, offsetsAddress, colorsAddress, size, repeat, pos, startRadius)  \
-  do {                                                                       \
-    bool drawn = false;                                                      \
-    if (blend_key) {                                                         \
-      drawn = commitRadialGradientFromStops<true>(                           \
-          sampler, offsetsAddress, colorsAddress, size, repeat, pos, startRadius, \
-          swgl_OutRGBA8, swgl_SpanLength);                                   \
-    } else {                                                                 \
-      drawn = commitRadialGradientFromStops<false>(                          \
-          sampler, offsetsAddress, colorsAddress, size, repeat, pos, startRadius, \
-          swgl_OutRGBA8, swgl_SpanLength);                                   \
-    }                                                                        \
-    if (drawn) {                                                             \
-      swgl_OutRGBA8 += swgl_SpanLength;                                      \
-      swgl_SpanLength = 0;                                                   \
-    }                                                                        \
+#define swgl_commitRadialGradientFromStopsRGBA8(                            \
+    sampler, offsetsAddress, colorsAddress, size, repeat, pos, startRadius) \
+  do {                                                                      \
+    bool drawn = false;                                                     \
+    if (blend_key) {                                                        \
+      drawn = commitRadialGradientFromStops<true, false>(                   \
+          sampler, offsetsAddress, colorsAddress, size, repeat, pos,        \
+          startRadius, swgl_OutRGBA8, swgl_SpanLength,                      \
+          static_cast<vec4>(0x0));                                          \
+    } else {                                                                \
+      drawn = commitRadialGradientFromStops<false, false>(                  \
+          sampler, offsetsAddress, colorsAddress, size, repeat, pos,        \
+          startRadius, swgl_OutRGBA8, swgl_SpanLength,                      \
+          static_cast<vec4>(0x0));                                          \
+    }                                                                       \
+    if (drawn) {                                                            \
+      swgl_OutRGBA8 += swgl_SpanLength;                                     \
+      swgl_SpanLength = 0;                                                  \
+    }                                                                       \
+  } while (0)
+
+#define swgl_commitDitheredRadialGradientFromStopsRGBA8(                    \
+    sampler, offsetsAddress, colorsAddress, size, repeat, pos, startRadius, \
+    fragCoord)                                                              \
+  do {                                                                      \
+    bool drawn = false;                                                     \
+    if (blend_key) {                                                        \
+      drawn = commitRadialGradientFromStops<true, true>(                    \
+          sampler, offsetsAddress, colorsAddress, size, repeat, pos,        \
+          startRadius, swgl_OutRGBA8, swgl_SpanLength, fragCoord);          \
+    } else {                                                                \
+      drawn = commitRadialGradientFromStops<false, true>(                   \
+          sampler, offsetsAddress, colorsAddress, size, repeat, pos,        \
+          startRadius, swgl_OutRGBA8, swgl_SpanLength, fragCoord);          \
+    }                                                                       \
+    if (drawn) {                                                            \
+      swgl_OutRGBA8 += swgl_SpanLength;                                     \
+      swgl_SpanLength = 0;                                                  \
+    }                                                                       \
   } while (0)
 
 // Extension to set a clip mask image to be sampled during blending. The offset
diff --git a/gfx/wr/webrender/res/brush_linear_gradient.glsl b/gfx/wr/webrender/res/brush_linear_gradient.glsl
@@ -86,9 +86,13 @@ void swgl_drawSpanRGBA8() {
     if (address < 0) {
         return;
     }
-
+#ifdef WR_FEATURE_DITHERING
+    swgl_commitDitheredLinearGradientRGBA8(sGpuBufferF, address, GRADIENT_ENTRIES, true, v_gradient_repeat.x != 0.0,
+                                   v_pos, v_scale_dir, v_start_offset.x, gl_FragCoord);
+#else
     swgl_commitLinearGradientRGBA8(sGpuBufferF, address, GRADIENT_ENTRIES, true, v_gradient_repeat.x != 0.0,
                                    v_pos, v_scale_dir, v_start_offset.x);
+#endif
 }
 #endif
 
diff --git a/gfx/wr/webrender/res/cs_radial_gradient.glsl b/gfx/wr/webrender/res/cs_radial_gradient.glsl
@@ -63,8 +63,12 @@ void swgl_drawSpanRGBA8() {
     if (address < 0) {
         return;
     }
-    swgl_commitRadialGradientRGBA8(sGpuBufferF, address, GRADIENT_ENTRIES, v_gradient_repeat.x != 0.0,
-                                   v_pos, v_start_radius.x);
+#ifdef WR_FEATURE_DITHERING
+    swgl_commitDitheredRadialGradientRGBA8(sGpuBufferF, address, GRADIENT_ENTRIES, v_gradient_repeat.x != 0.0,
+                                   v_pos, v_start_radius.x, gl_FragCoord);
+#else
+    swgl_commitRadialGradientRGBA8(sGpuBufferF, address, GRADIENT_ENTRIES, v_gradient_repeat.x != 0.0, v_pos, v_start_radius.x);
+#endif
 }
 #endif
 
diff --git a/gfx/wr/webrender/res/ps_quad_gradient.glsl b/gfx/wr/webrender/res/ps_quad_gradient.glsl
@@ -383,8 +383,13 @@ void swgl_drawSpanRGBA8() {
     float start_radius = v_flat_data.x;
     bool repeat = v_gradient_header.z != 0.0;
 
+#ifdef WR_FEATURE_DITHERING
+    swgl_commitDitheredRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
+                                            stop_count, repeat, pos, start_radius, gl_FragCoord);
+#else
     swgl_commitRadialGradientFromStopsRGBA8(sGpuBufferF, offsets_addr, colors_addr,
                                             stop_count, repeat, pos, start_radius);
+#endif
 }
 #endif
 
diff --git a/gfx/wr/webrender/res/ps_quad_radial_gradient.glsl b/gfx/wr/webrender/res/ps_quad_radial_gradient.glsl
@@ -73,8 +73,12 @@ void swgl_drawSpanRGBA8() {
     if (address < 0) {
         return;
     }
-    swgl_commitRadialGradientRGBA8(sGpuBufferF, address, GRADIENT_ENTRIES, v_gradient_repeat.x != 0.0,
-                                   v_pos, v_start_radius.x);
+#ifdef WR_FEATURE_DITHERING
+    swgl_commitDitheredRadialGradientRGBA8(sGpuBufferF, address, GRADIENT_ENTRIES, v_gradient_repeat.x != 0.0,
+                                   v_pos, v_start_radius.x, gl_FragCoord);
+#else
+    swgl_commitRadialGradientRGBA8(sGpuBufferF, address, GRADIENT_ENTRIES, v_gradient_repeat.x != 0.0, v_pos, v_start_radius.x); 
+#endif
 }
 #endif

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	gfx/wr/glsl-to-cxx/src/hir.rs	\|	24	++++++++++++++++++++++++
M	gfx/wr/swgl/src/swgl_ext.h	\|	429	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
M	gfx/wr/webrender/res/brush_linear_gradient.glsl	\|	6	+++++-
M	gfx/wr/webrender/res/cs_radial_gradient.glsl	\|	8	++++++--
M	gfx/wr/webrender/res/ps_quad_gradient.glsl	\|	5	+++++
M	gfx/wr/webrender/res/ps_quad_radial_gradient.glsl	\|	8	++++++--