tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 10aba4a5488f6eb2a7844e249b9ab25ebc1d99dd
parent 46089e005fc3e450457eb4d1401a6f0d661d9160
Author: KS <kshampur@mozilla.com>
Date:   Wed,  5 Nov 2025 20:40:09 +0000

Bug 1968521 - Add subtest alerting for jetstream3 subtests with alert threshold at 5% r=perftest-reviewers,sparky,jandem

This patch enables alerting on JetStream 3 benchmark. For subtests, 5%
threshold will be used, and the "overall score" will still be 2%. As JS3
is still in active development, 2% could be too noisy for the subtests
so for the time being 5% will be used.

Additionally, a new "subtest_alert_threshold" attribute is introduced as
a better way to seperate alerting from main score and subtests.

Differential Revision: https://phabricator.services.mozilla.com/D271141

Diffstat:
Mtaskcluster/kinds/browsertime/desktop.yml | 7+------
Mtaskcluster/kinds/browsertime/mobile.yml | 7+------
Mtesting/perfdocs/generated/raptor.rst | 6++++--
Mtesting/raptor/browsertime/support-scripts/jetstream3.py | 8+++++++-
Mtesting/raptor/raptor/manifest.py | 4++++
Mtesting/raptor/raptor/tests/benchmarks/jetstream3-desktop.toml | 1+
Mtesting/raptor/raptor/tests/benchmarks/jetstream3-mobile.toml | 1+
7 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/taskcluster/kinds/browsertime/desktop.yml b/taskcluster/kinds/browsertime/desktop.yml @@ -457,12 +457,7 @@ browsertime-benchmark: by-app: firefox: by-subtest: - # Bug 1971608 - # For now, keep js3 on tier 3 to minimize potential alerts - # due to now running on autoland. - # Bump to tier 1 or 2 once more development has been done on - # js3. - jetstream3: 3 + jetstream3: 2 default: by-test-platform: linux.*clang-trunk.*: 2 diff --git a/taskcluster/kinds/browsertime/mobile.yml b/taskcluster/kinds/browsertime/mobile.yml @@ -518,12 +518,7 @@ browsertime-benchmark-jetstream3: treeherder-symbol: Btime() tier: by-app: - # Bug 1971608 - # For now, keep js3 on tier 3 to minimize potential alerts - # due to now running on autoland. - # Bump to tier 1 or 2 once more development has been done on - # js3. - fenix: 3 + fenix: 2 default: 3 browsertime-benchmark-unity-webgl-mobile: diff --git a/testing/perfdocs/generated/raptor.rst b/testing/perfdocs/generated/raptor.rst @@ -1075,12 +1075,13 @@ Standard benchmarks are third-party tests (i.e. Speedometer) that we have integr * **expose browser profiler**: true * **expose chrome trace**: true * **gecko profile interval**: 1 - * **link searchfox**: `<https://searchfox.org/mozilla-central/source/testing/raptor/raptor/tests/benchmarks/jetstream3-desktop.toml#16>`__ + * **link searchfox**: `<https://searchfox.org/mozilla-central/source/testing/raptor/raptor/tests/benchmarks/jetstream3-desktop.toml#17>`__ * **lower is better**: false * **page cycles**: 1 * **page timeout**: 2000000 * **repository**: https://github.com/webkit/jetstream * **repository revision**: 0debbb0b94486d4c78162ad5a102279b96dc79d3 + * **subtest alert threshold**: 5.0 * **subtest lower is better**: true * **subtest unit**: ms * **suite name**: JetStream3.0 @@ -1381,13 +1382,14 @@ Standard benchmarks are third-party tests (i.e. Speedometer) that we have integr * **expected**: pass * **expose browser profiler**: true * **gecko profile interval**: 1 - * **link searchfox**: `<https://searchfox.org/mozilla-central/source/testing/raptor/raptor/tests/benchmarks/jetstream3-mobile.toml#21>`__ + * **link searchfox**: `<https://searchfox.org/mozilla-central/source/testing/raptor/raptor/tests/benchmarks/jetstream3-mobile.toml#22>`__ * **lower is better**: false * **page cycles**: 1 * **page timeout**: 2000000 * **preferences**: dom.max_script_run_time=0 * **repository**: https://github.com/webkit/jetstream * **repository revision**: 0debbb0b94486d4c78162ad5a102279b96dc79d3 + * **subtest alert threshold**: 5.0 * **subtest lower is better**: true * **subtest unit**: ms * **suite name**: JetStream3.0 diff --git a/testing/raptor/browsertime/support-scripts/jetstream3.py b/testing/raptor/browsertime/support-scripts/jetstream3.py @@ -46,13 +46,19 @@ class JetStreamSupport(BasePythonSupport): subtest = { "unit": unit, - "alertThreshold": float(test.get("alert_threshold", 2.0)), + # Bug 1968521 for the time being use 5% for jetstream 3 subtests. + "alertThreshold": float(test.get("subtest_alert_threshold", 5.0)), "lowerIsBetter": lower_is_better, "name": measurement_name, "replicates": replicates, + "shouldAlert": True, "value": round(filters.mean(replicates), 3), } + # Overall score also appears in the subtests payload so just ensure it is 2% + if measurement_name == "score": + subtest["alertThreshold"] = float(test.get("alert_threshold", 2.0)) + return subtest def summarize_test(self, test, suite, **kwargs): diff --git a/testing/raptor/raptor/manifest.py b/testing/raptor/raptor/manifest.py @@ -33,6 +33,7 @@ LIVE_SITE_TIMEOUT_MULTIPLIER = 1.2 required_settings = [ "alert_threshold", + "subtest_alert_threshold", "apps", "lower_is_better", "measure", @@ -86,6 +87,9 @@ def validate_test_toml(test_details): continue if setting == "scenario_time" and test_details["type"] != "scenario": continue + # subtest_alert_threshold is optional + if setting == "subtest_alert_threshold": + continue if test_details.get(setting) is None: # if page-cycles is not specified, it's ok as long as browser-cycles is there if ( diff --git a/testing/raptor/raptor/tests/benchmarks/jetstream3-desktop.toml b/testing/raptor/raptor/tests/benchmarks/jetstream3-desktop.toml @@ -1,5 +1,6 @@ [default] alert_threshold = 2.0 +subtest_alert_threshold = 5.0 apps = "firefox, chrome, safari, safari-tp, custom-car" gecko_profile_interval = 1 expose_browser_profiler = true diff --git a/testing/raptor/raptor/tests/benchmarks/jetstream3-mobile.toml b/testing/raptor/raptor/tests/benchmarks/jetstream3-mobile.toml @@ -1,5 +1,6 @@ [default] alert_threshold = 2.0 +subtest_alert_threshold = 5.0 apps = "chrome-m, cstm-car-m, fenix" gecko_profile_interval = 1 expose_browser_profiler = true