tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 8cd5d52a46f898a44589239a49ec90aec22a6e18
parent 5ef169b4891fc4c7947f0064f63678ec205b19c7
Author: Erik Nordin <enordin@mozilla.com>
Date:   Tue, 28 Oct 2025 18:46:18 +0000

Bug 1861698 - Add translations langId telemetry r=translations-reviewers,gregtatum

This patch adds a new Translations telemetry event
for the language identification, such that we can
get a sense of the average performance of our language
detection code in aggregate.

Differential Revision: https://phabricator.services.mozilla.com/D269461

Diffstat:
Mbrowser/components/translations/tests/browser/browser_translations_full_page_telemetry_auto_translate.js | 18++++++++++++++++++
Mbrowser/components/translations/tests/browser/browser_translations_full_page_telemetry_basics.js | 18++++++++++++++++++
Mbrowser/components/translations/tests/browser/browser_translations_full_page_telemetry_open_panel.js | 18++++++++++++++++++
Mbrowser/components/translations/tests/browser/browser_translations_full_page_telemetry_panel_auto_offer.js | 18++++++++++++++++++
Mbrowser/components/translations/tests/browser/browser_translations_full_page_telemetry_panel_auto_offer_settings.js | 18++++++++++++++++++
Mbrowser/components/translations/tests/browser/browser_translations_full_page_telemetry_retranslate.js | 18++++++++++++++++++
Mbrowser/components/translations/tests/browser/browser_translations_full_page_telemetry_switch_languages.js | 36++++++++++++++++++++++++++++++++++++
Mbrowser/components/translations/tests/browser/browser_translations_full_page_telemetry_translation_failure.js | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mbrowser/components/translations/tests/browser/browser_translations_full_page_telemetry_translation_request.js | 36++++++++++++++++++++++++++++++++++++
Mbrowser/components/translations/tests/browser/browser_translations_full_page_telemetry_unsupported_lang.js | 7+++++++
Mtoolkit/components/translations/TranslationsTelemetry.sys.mjs | 43+++++++++++++++++++++++++++++++++++++++++++
Mtoolkit/components/translations/actors/TranslationsChild.sys.mjs | 4+++-
Mtoolkit/components/translations/actors/TranslationsParent.sys.mjs | 30++++++++++++++++++++++++++++--
Mtoolkit/components/translations/metrics.yaml | 43+++++++++++++++++++++++++++++++++++++++++++
Mtoolkit/components/translations/tests/browser/shared-head.js | 4++--
15 files changed, 378 insertions(+), 5 deletions(-)

diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_auto_translate.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_auto_translate.js @@ -123,5 +123,23 @@ add_task(async function test_translations_telemetry_auto_translate() { expectedEventCount: 1, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_basics.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_basics.js @@ -89,5 +89,23 @@ add_task(async function test_translations_telemetry_basics() { expectedEventCount: 0, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_open_panel.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_open_panel.js @@ -89,5 +89,23 @@ add_task(async function test_translations_telemetry_open_panel() { expectedEventCount: 0, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_panel_auto_offer.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_panel_auto_offer.js @@ -83,5 +83,23 @@ add_task(async function test_translations_panel_auto_offer() { expectedEventCount: 0, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 3, + assertForAllEvents: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_panel_auto_offer_settings.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_panel_auto_offer_settings.js @@ -116,5 +116,23 @@ add_task(async function test_translations_panel_auto_offer_settings() { expectedEventCount: 0, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_retranslate.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_retranslate.js @@ -160,5 +160,23 @@ add_task(async function test_translations_telemetry_retranslate() { expectedEventCount: 2, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_switch_languages.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_switch_languages.js @@ -95,6 +95,24 @@ add_task(async function test_translations_telemetry_switch_from_language() { expectedEventCount: 0, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await cleanup(); }); @@ -188,5 +206,23 @@ add_task(async function test_translations_telemetry_switch_to_language() { expectedEventCount: 0, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_translation_failure.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_translation_failure.js @@ -25,6 +25,24 @@ add_task( await FullPageTranslationsTestUtils.assertPageIsNotTranslated(runInPage); await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + + await TestTranslationsTelemetry.assertEvent( Glean.translations.translationRequest, { expectedEventCount: 0, @@ -110,6 +128,24 @@ add_task( expectedEventCount: 0, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await cleanup(); } ); @@ -132,6 +168,24 @@ add_task(async function test_translations_telemetry_auto_translation_failure() { await FullPageTranslationsTestUtils.assertPageIsNotTranslated(runInPage); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await TestTranslationsTelemetry.assertEvent(Glean.translationsPanel.open, { expectedEventCount: 1, expectNewFlowId: true, @@ -193,5 +247,23 @@ add_task(async function test_translations_telemetry_auto_translation_failure() { expectedEventCount: 0, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_translation_request.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_translation_request.js @@ -90,6 +90,24 @@ add_task(async function test_translations_telemetry_manual_translation() { expectedEventCount: 1, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await cleanup(); }); @@ -153,5 +171,23 @@ add_task(async function test_translations_telemetry_auto_translation() { expectedEventCount: 1, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 1, + assertForMostRecentEvent: { + html_lang_attribute: "es", + identified_language: "es", + lang_tags_match: true, + is_lang_attribute_valid: true, + extracted_code_units: 2132, + extraction_time: ms => 0 < ms, + identification_time: ms => 0 < ms, + total_time: ms => 0 < ms, + confident: true, + }, + } + ); + await cleanup(); }); diff --git a/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_unsupported_lang.js b/browser/components/translations/tests/browser/browser_translations_full_page_telemetry_unsupported_lang.js @@ -208,5 +208,12 @@ add_task(async function test_translations_telemetry_unsupported_lang() { expectedEventCount: 1, }); + await TestTranslationsTelemetry.assertEvent( + Glean.translations.identifyPageLanguage, + { + expectedEventCount: 0, + } + ); + await cleanup(); }); diff --git a/toolkit/components/translations/TranslationsTelemetry.sys.mjs b/toolkit/components/translations/TranslationsTelemetry.sys.mjs @@ -89,6 +89,49 @@ export class TranslationsTelemetry { } /** + * Records a telemetry event when the language of a page is identified for Translations. + * + * @param {object} data + * @param {string | null} [data.htmlLangAttribute] + * @param {string} data.identifiedLanguage + * @param {boolean | null} [data.langTagsMatch] + * @param {boolean | null} [data.isLangAttributeValid] + * @param {number} data.extractedCodeUnits + * @param {number} data.extractionTime + * @param {number} data.identificationTime + * @param {number} data.totalTime + * @param {boolean} data.confident + */ + static onIdentifyPageLanguage(data) { + const { + htmlLangAttribute, + identifiedLanguage, + langTagsMatch, + isLangAttributeValid, + extractedCodeUnits, + extractionTime, + identificationTime, + totalTime, + confident, + } = data; + Glean.translations.identifyPageLanguage.record({ + html_lang_attribute: htmlLangAttribute, + identified_language: identifiedLanguage, + lang_tags_match: langTagsMatch, + is_lang_attribute_valid: isLangAttributeValid, + extracted_code_units: extractedCodeUnits, + extraction_time: extractionTime, + identification_time: identificationTime, + total_time: totalTime, + confident, + }); + TranslationsTelemetry.logEventToConsole( + TranslationsTelemetry.onIdentifyPageLanguage, + data + ); + } + + /** * Records a telemetry event when full page translation fails. * * @param {string} errorMessage diff --git a/toolkit/components/translations/actors/TranslationsChild.sys.mjs b/toolkit/components/translations/actors/TranslationsChild.sys.mjs @@ -93,8 +93,10 @@ export class TranslationsChild extends JSWindowActorChild { Ci.nsIDocumentEncoder.OutputBodyOnly | Ci.nsIDocumentEncoder.SkipInvisibleContent | Ci.nsIDocumentEncoder.AllowCrossShadowBoundary | + Ci.nsIDocumentEncoder.OutputForPlainTextClipboardCopy | + Ci.nsIDocumentEncoder.OutputDisallowLineBreaking | Ci.nsIDocumentEncoder.OutputDropInvisibleBreak | - Ci.nsIDocumentEncoder.OutputDisallowLineBreaking + Ci.nsIDocumentEncoder.OutputLFLineBreak ); return encoder.encodeToStringWithMaxLength(sufficientLength); diff --git a/toolkit/components/translations/actors/TranslationsParent.sys.mjs b/toolkit/components/translations/actors/TranslationsParent.sys.mjs @@ -3578,6 +3578,8 @@ export class TranslationsParent extends JSWindowActorParent { return { language: "en", confident: false, languages: [] }; } + const extractionTime = ChromeUtils.now() - extractionStartTime; + lazy.console.debug( `Extracted Page Text (${pageText.length} code units):\n\n`, pageText @@ -3585,7 +3587,7 @@ export class TranslationsParent extends JSWindowActorParent { const extractionLog = `Extracted ${pageText.length} code units of text in ` + - `${(ChromeUtils.now() - extractionStartTime).toFixed(3)} ms.`; + `${extractionTime.toFixed(3)} ms.`; lazy.console.log(extractionLog); ChromeUtils.addProfilerMarker( @@ -3601,9 +3603,10 @@ export class TranslationsParent extends JSWindowActorParent { return { language: "en", confident: false, languages: [] }; } + const identificationTime = ChromeUtils.now() - identificationStartTime; const identificationLog = `Identified ${pageText.length} code units of text as "${result.language}" ` + - `in ${(ChromeUtils.now() - identificationStartTime).toFixed(3)} ms.`; + `in ${identificationTime.toFixed(3)} ms.`; lazy.console.log(identificationLog); ChromeUtils.addProfilerMarker( @@ -3621,6 +3624,29 @@ export class TranslationsParent extends JSWindowActorParent { result.confident = false; } + const htmlLangAttribute = + this.languageState?.detectedLanguages?.htmlLangAttribute ?? null; + const identifiedLanguage = result.language; + + TranslationsParent.telemetry().onIdentifyPageLanguage({ + htmlLangAttribute, + identifiedLanguage, + langTagsMatch: htmlLangAttribute + ? lazy.TranslationsUtils.langTagsMatch( + htmlLangAttribute, + identifiedLanguage + ) + : null, + isLangAttributeValid: htmlLangAttribute + ? lazy.TranslationsUtils.isLangTagValid(htmlLangAttribute) + : null, + extractedCodeUnits: pageText.length, + extractionTime, + identificationTime, + totalTime: extractionTime + identificationTime, + confident: result.confident, + }); + return result; } diff --git a/toolkit/components/translations/metrics.yaml b/toolkit/components/translations/metrics.yaml @@ -55,6 +55,49 @@ translations: - translations-telemetry-alerts@mozilla.com expires: never + identify_page_language: + type: event + description: > + Triggers when the language of a page is identified for Translations. + extra_keys: + html_lang_attribute: + type: string + description: The language tag of the page specified within the HTML. + identified_language: + type: string + description: The language tag that was identified via detection. + lang_tags_match: + type: boolean + description: Whether the identified language matches the HTML lang attribute. + is_lang_attribute_valid: + type: boolean + description: Whether the language tag of the page is a valid language tag. + extracted_code_units: + type: quantity + description: The length of the page text sample that was used for detection. + extraction_time: + type: quantity + description: The time it took to extract a sample of page text, in ms. + identification_time: + type: quantity + description: The time it took to identify a language from the extracted text, in ms. + total_time: + type: quantity + description: The total time it took to extract page text and identify it, in ms. + confident: + type: boolean + description: Whether the language detection was confident in the result. + bugs: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1861698 + data_reviews: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1861698 + data_sensitivity: + - interaction + notification_emails: + - translations-telemetry-alerts@mozilla.com + expires: never + + translation_request: type: event description: > diff --git a/toolkit/components/translations/tests/browser/shared-head.js b/toolkit/components/translations/tests/browser/shared-head.js @@ -2522,7 +2522,7 @@ class TestTranslationsTelemetry { if (typeof expected === "function") { ok( expected(event.extra[key]), - `Telemetry event ${name} value for ${key} should match the expected predicate` + `Telemetry event ${name} value for ${key} should match the expected predicate: got ${event.extra[key]}` ); } else { is( @@ -2545,7 +2545,7 @@ class TestTranslationsTelemetry { if (typeof expected === "function") { ok( expected(events[eventCount - 1].extra[key]), - `Telemetry event ${name} value for ${key} should match the expected predicate` + `Telemetry event ${name} value for ${key} should match the expected predicate: got ${events[eventCount - 1].extra[key]}` ); } else { is(