[ tor-browser ].git.dasho

commit 10aba4a5488f6eb2a7844e249b9ab25ebc1d99dd
parent 46089e005fc3e450457eb4d1401a6f0d661d9160
Author: KS <kshampur@mozilla.com>
Date:   Wed,  5 Nov 2025 20:40:09 +0000

Bug 1968521 - Add subtest alerting for jetstream3 subtests with alert threshold at 5% r=perftest-reviewers,sparky,jandem

This patch enables alerting on JetStream 3 benchmark. For subtests, 5%
threshold will be used, and the "overall score" will still be 2%. As JS3
is still in active development, 2% could be too noisy for the subtests
so for the time being 5% will be used.

Additionally, a new "subtest_alert_threshold" attribute is introduced as
a better way to seperate alerting from main score and subtests.

Differential Revision: https://phabricator.services.mozilla.com/D271141

Diffstat:
M taskcluster/kinds/browsertime/desktop.yml  | 7 +------
M taskcluster/kinds/browsertime/mobile.yml  | 7 +------
M testing/perfdocs/generated/raptor.rst  | 6 ++++--
M testing/raptor/browsertime/support-scripts/jetstream3.py  | 8 +++++++-
M testing/raptor/raptor/manifest.py  | 4 ++++
M testing/raptor/raptor/tests/benchmarks/jetstream3-desktop.toml  | 1 +
M testing/raptor/raptor/tests/benchmarks/jetstream3-mobile.toml  | 1 +

7 files changed, 19 insertions(+), 15 deletions(-)
diff --git a/taskcluster/kinds/browsertime/desktop.yml b/taskcluster/kinds/browsertime/desktop.yml
@@ -457,12 +457,7 @@ browsertime-benchmark:
         by-app:
             firefox:
                 by-subtest:
-                    # Bug 1971608
-                    # For now, keep js3 on tier 3 to minimize potential alerts
-                    # due to now running on autoland.
-                    # Bump to tier 1 or 2 once more development has been done on
-                    # js3.
-                    jetstream3: 3
+                    jetstream3: 2
                     default:
                         by-test-platform:
                             linux.*clang-trunk.*: 2
diff --git a/taskcluster/kinds/browsertime/mobile.yml b/taskcluster/kinds/browsertime/mobile.yml
@@ -518,12 +518,7 @@ browsertime-benchmark-jetstream3:
     treeherder-symbol: Btime()
     tier:
         by-app:
-            # Bug 1971608
-            # For now, keep js3 on tier 3 to minimize potential alerts
-            # due to now running on autoland.
-            # Bump to tier 1 or 2 once more development has been done on
-            # js3.
-            fenix: 3
+            fenix: 2
             default: 3
 
 browsertime-benchmark-unity-webgl-mobile:
diff --git a/testing/perfdocs/generated/raptor.rst b/testing/perfdocs/generated/raptor.rst
@@ -1075,12 +1075,13 @@ Standard benchmarks are third-party tests (i.e. Speedometer) that we have integr
    * **expose browser profiler**: true
    * **expose chrome trace**: true
    * **gecko profile interval**: 1
-   * **link searchfox**: `<https://searchfox.org/mozilla-central/source/testing/raptor/raptor/tests/benchmarks/jetstream3-desktop.toml#16>`__
+   * **link searchfox**: `<https://searchfox.org/mozilla-central/source/testing/raptor/raptor/tests/benchmarks/jetstream3-desktop.toml#17>`__
    * **lower is better**: false
    * **page cycles**: 1
    * **page timeout**: 2000000
    * **repository**: https://github.com/webkit/jetstream
    * **repository revision**: 0debbb0b94486d4c78162ad5a102279b96dc79d3
+   * **subtest alert threshold**: 5.0
    * **subtest lower is better**: true
    * **subtest unit**: ms
    * **suite name**: JetStream3.0
@@ -1381,13 +1382,14 @@ Standard benchmarks are third-party tests (i.e. Speedometer) that we have integr
    * **expected**: pass
    * **expose browser profiler**: true
    * **gecko profile interval**: 1
-   * **link searchfox**: `<https://searchfox.org/mozilla-central/source/testing/raptor/raptor/tests/benchmarks/jetstream3-mobile.toml#21>`__
+   * **link searchfox**: `<https://searchfox.org/mozilla-central/source/testing/raptor/raptor/tests/benchmarks/jetstream3-mobile.toml#22>`__
    * **lower is better**: false
    * **page cycles**: 1
    * **page timeout**: 2000000
    * **preferences**: dom.max_script_run_time=0
    * **repository**: https://github.com/webkit/jetstream
    * **repository revision**: 0debbb0b94486d4c78162ad5a102279b96dc79d3
+   * **subtest alert threshold**: 5.0
    * **subtest lower is better**: true
    * **subtest unit**: ms
    * **suite name**: JetStream3.0
diff --git a/testing/raptor/browsertime/support-scripts/jetstream3.py b/testing/raptor/browsertime/support-scripts/jetstream3.py
@@ -46,13 +46,19 @@ class JetStreamSupport(BasePythonSupport):
 
         subtest = {
             "unit": unit,
-            "alertThreshold": float(test.get("alert_threshold", 2.0)),
+            # Bug 1968521 for the time being use 5% for jetstream 3 subtests.
+            "alertThreshold": float(test.get("subtest_alert_threshold", 5.0)),
             "lowerIsBetter": lower_is_better,
             "name": measurement_name,
             "replicates": replicates,
+            "shouldAlert": True,
             "value": round(filters.mean(replicates), 3),
         }
 
+        # Overall score also appears in the subtests payload so just ensure it is 2%
+        if measurement_name == "score":
+            subtest["alertThreshold"] = float(test.get("alert_threshold", 2.0))
+
         return subtest
 
     def summarize_test(self, test, suite, **kwargs):
diff --git a/testing/raptor/raptor/manifest.py b/testing/raptor/raptor/manifest.py
@@ -33,6 +33,7 @@ LIVE_SITE_TIMEOUT_MULTIPLIER = 1.2
 
 required_settings = [
     "alert_threshold",
+    "subtest_alert_threshold",
     "apps",
     "lower_is_better",
     "measure",
@@ -86,6 +87,9 @@ def validate_test_toml(test_details):
             continue
         if setting == "scenario_time" and test_details["type"] != "scenario":
             continue
+        # subtest_alert_threshold is optional
+        if setting == "subtest_alert_threshold":
+            continue
         if test_details.get(setting) is None:
             # if page-cycles is not specified, it's ok as long as browser-cycles is there
             if (
diff --git a/testing/raptor/raptor/tests/benchmarks/jetstream3-desktop.toml b/testing/raptor/raptor/tests/benchmarks/jetstream3-desktop.toml
@@ -1,5 +1,6 @@
 [default]
 alert_threshold = 2.0
+subtest_alert_threshold = 5.0
 apps = "firefox, chrome, safari, safari-tp, custom-car"
 gecko_profile_interval = 1
 expose_browser_profiler = true
diff --git a/testing/raptor/raptor/tests/benchmarks/jetstream3-mobile.toml b/testing/raptor/raptor/tests/benchmarks/jetstream3-mobile.toml
@@ -1,5 +1,6 @@
 [default]
 alert_threshold = 2.0
+subtest_alert_threshold = 5.0
 apps = "chrome-m, cstm-car-m, fenix"
 gecko_profile_interval = 1
 expose_browser_profiler = true

	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE

M	taskcluster/kinds/browsertime/desktop.yml	\|	7	+------
M	taskcluster/kinds/browsertime/mobile.yml	\|	7	+------
M	testing/perfdocs/generated/raptor.rst	\|	6	++++--
M	testing/raptor/browsertime/support-scripts/jetstream3.py	\|	8	+++++++-
M	testing/raptor/raptor/manifest.py	\|	4	++++
M	testing/raptor/raptor/tests/benchmarks/jetstream3-desktop.toml	\|	1	+
M	testing/raptor/raptor/tests/benchmarks/jetstream3-mobile.toml	\|	1	+